0
# Ensemble Methods
1
2
Intel-accelerated ensemble algorithms including Random Forest and Extra Trees for both classification and regression. These implementations provide significant performance improvements through optimized tree construction and parallel processing.
3
4
## Capabilities
5
6
### Random Forest Classifier
7
8
Intel-optimized Random Forest for classification with accelerated tree building and prediction.
9
10
```python { .api }
11
class RandomForestClassifier:
12
"""
13
Random Forest classifier with Intel optimization.
14
15
Ensemble of decision trees with optimized parallel tree construction
16
and Intel hardware acceleration for improved performance.
17
"""
18
19
def __init__(
20
self,
21
n_estimators=100,
22
criterion='gini',
23
max_depth=None,
24
min_samples_split=2,
25
min_samples_leaf=1,
26
min_weight_fraction_leaf=0.0,
27
max_features='sqrt',
28
max_leaf_nodes=None,
29
min_impurity_decrease=0.0,
30
bootstrap=True,
31
oob_score=False,
32
n_jobs=None,
33
random_state=None,
34
verbose=0,
35
warm_start=False,
36
class_weight=None,
37
ccp_alpha=0.0,
38
max_samples=None
39
):
40
"""Initialize Random Forest Classifier with Intel optimization."""
41
42
def fit(self, X, y, sample_weight=None):
43
"""
44
Build forest of trees from training set.
45
46
Parameters:
47
X (array-like): Training data
48
y (array-like): Target values
49
sample_weight (array-like): Sample weights
50
51
Returns:
52
self: Fitted estimator
53
"""
54
55
def predict(self, X):
56
"""Predict class for samples."""
57
58
def predict_proba(self, X):
59
"""Predict class probabilities."""
60
61
def predict_log_proba(self, X):
62
"""Predict class log-probabilities."""
63
64
def score(self, X, y, sample_weight=None):
65
"""Return mean accuracy."""
66
67
# Attributes
68
estimators_: ... # Collection of fitted sub-estimators
69
classes_: ... # Class labels
70
n_classes_: ... # Number of classes
71
feature_importances_: ... # Feature importances
72
n_features_in_: ... # Number of features
73
oob_score_: ... # Out-of-bag score
74
```
75
76
### Random Forest Regressor
77
78
Intel-optimized Random Forest for regression tasks.
79
80
```python { .api }
81
class RandomForestRegressor:
82
"""
83
Random Forest regressor with Intel optimization.
84
85
Ensemble of decision trees optimized for regression with
86
Intel hardware acceleration.
87
"""
88
89
def __init__(
90
self,
91
n_estimators=100,
92
criterion='squared_error',
93
max_depth=None,
94
min_samples_split=2,
95
min_samples_leaf=1,
96
min_weight_fraction_leaf=0.0,
97
max_features=1.0,
98
max_leaf_nodes=None,
99
min_impurity_decrease=0.0,
100
bootstrap=True,
101
oob_score=False,
102
n_jobs=None,
103
random_state=None,
104
verbose=0,
105
warm_start=False,
106
ccp_alpha=0.0,
107
max_samples=None
108
):
109
"""Initialize Random Forest Regressor with Intel optimization."""
110
111
def fit(self, X, y, sample_weight=None):
112
"""Build forest of trees."""
113
114
def predict(self, X):
115
"""Predict regression target."""
116
117
def score(self, X, y, sample_weight=None):
118
"""Return R² score."""
119
120
# Attributes
121
estimators_: ...
122
feature_importances_: ...
123
n_features_in_: ...
124
oob_score_: ...
125
```
126
127
### Extra Trees Classifier
128
129
Extremely Randomized Trees classifier with Intel optimization.
130
131
```python { .api }
132
class ExtraTreesClassifier:
133
"""
134
Extra Trees classifier with Intel optimization.
135
136
Ensemble method using extremely randomized trees with
137
optimized tree construction algorithms.
138
"""
139
140
def __init__(
141
self,
142
n_estimators=100,
143
criterion='gini',
144
max_depth=None,
145
min_samples_split=2,
146
min_samples_leaf=1,
147
min_weight_fraction_leaf=0.0,
148
max_features='sqrt',
149
max_leaf_nodes=None,
150
min_impurity_decrease=0.0,
151
bootstrap=False,
152
oob_score=False,
153
n_jobs=None,
154
random_state=None,
155
verbose=0,
156
warm_start=False,
157
class_weight=None,
158
ccp_alpha=0.0,
159
max_samples=None
160
):
161
"""Initialize Extra Trees Classifier."""
162
163
def fit(self, X, y, sample_weight=None):
164
"""Build forest of extremely randomized trees."""
165
166
def predict(self, X):
167
"""Predict class for samples."""
168
169
def predict_proba(self, X):
170
"""Predict class probabilities."""
171
172
# Attributes similar to RandomForestClassifier
173
```
174
175
### Extra Trees Regressor
176
177
Extremely Randomized Trees regressor with Intel optimization.
178
179
```python { .api }
180
class ExtraTreesRegressor:
181
"""
182
Extra Trees regressor with Intel optimization.
183
184
Regression ensemble using extremely randomized trees
185
with Intel hardware acceleration.
186
"""
187
188
def __init__(
189
self,
190
n_estimators=100,
191
criterion='squared_error',
192
max_depth=None,
193
min_samples_split=2,
194
min_samples_leaf=1,
195
min_weight_fraction_leaf=0.0,
196
max_features=1.0,
197
max_leaf_nodes=None,
198
min_impurity_decrease=0.0,
199
bootstrap=False,
200
oob_score=False,
201
n_jobs=None,
202
random_state=None,
203
verbose=0,
204
warm_start=False,
205
ccp_alpha=0.0,
206
max_samples=None
207
):
208
"""Initialize Extra Trees Regressor."""
209
210
def fit(self, X, y, sample_weight=None):
211
"""Build forest of extremely randomized trees."""
212
213
def predict(self, X):
214
"""Predict regression target."""
215
216
# Attributes similar to RandomForestRegressor
217
```
218
219
## Usage Examples
220
221
### Random Forest Classification
222
223
```python
224
import numpy as np
225
from sklearnex.ensemble import RandomForestClassifier
226
from sklearn.datasets import make_classification
227
from sklearn.model_selection import train_test_split
228
229
# Generate classification dataset
230
X, y = make_classification(
231
n_samples=1000, n_features=20, n_informative=10,
232
n_redundant=10, n_classes=3, random_state=42
233
)
234
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
235
236
# Create and train Random Forest
237
rf = RandomForestClassifier(
238
n_estimators=100,
239
max_depth=10,
240
random_state=42,
241
n_jobs=-1
242
)
243
rf.fit(X_train, y_train)
244
245
# Make predictions
246
y_pred = rf.predict(X_test)
247
y_proba = rf.predict_proba(X_test)
248
accuracy = rf.score(X_test, y_test)
249
250
print(f"Accuracy: {accuracy:.3f}")
251
print(f"Number of trees: {len(rf.estimators_)}")
252
print(f"Feature importances shape: {rf.feature_importances_.shape}")
253
254
# Top 5 most important features
255
feature_importance = rf.feature_importances_
256
top_features = np.argsort(feature_importance)[-5:][::-1]
257
print(f"Top 5 features: {top_features}")
258
```
259
260
### Random Forest Regression
261
262
```python
263
import numpy as np
264
from sklearnex.ensemble import RandomForestRegressor
265
from sklearn.datasets import make_regression
266
from sklearn.model_selection import train_test_split
267
268
# Generate regression dataset
269
X, y = make_regression(
270
n_samples=1000, n_features=15, noise=0.1, random_state=42
271
)
272
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
273
274
# Create and train Random Forest Regressor
275
rf_reg = RandomForestRegressor(
276
n_estimators=100,
277
max_depth=15,
278
min_samples_split=5,
279
random_state=42,
280
oob_score=True
281
)
282
rf_reg.fit(X_train, y_train)
283
284
# Evaluate model
285
y_pred = rf_reg.predict(X_test)
286
r2_score = rf_reg.score(X_test, y_test)
287
oob_score = rf_reg.oob_score_
288
289
print(f"R² Score: {r2_score:.3f}")
290
print(f"Out-of-bag Score: {oob_score:.3f}")
291
print(f"Feature importances sum: {rf_reg.feature_importances_.sum():.3f}")
292
```
293
294
### Comparing Ensemble Methods
295
296
```python
297
import time
298
import numpy as np
299
from sklearnex.ensemble import RandomForestClassifier, ExtraTreesClassifier
300
from sklearn.datasets import make_classification
301
from sklearn.model_selection import cross_val_score
302
303
# Generate dataset
304
X, y = make_classification(
305
n_samples=2000, n_features=30, n_informative=15,
306
n_classes=4, random_state=42
307
)
308
309
# Compare Random Forest vs Extra Trees
310
models = {
311
'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
312
'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42)
313
}
314
315
for name, model in models.items():
316
start_time = time.time()
317
318
# Cross-validation
319
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
320
321
fit_time = time.time() - start_time
322
323
print(f"{name}:")
324
print(f" Mean CV Accuracy: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")
325
print(f" Training Time: {fit_time:.2f} seconds")
326
327
# Fit for feature importance analysis
328
model.fit(X, y)
329
print(f" Feature Importance Range: {model.feature_importances_.min():.4f} - {model.feature_importances_.max():.4f}")
330
print()
331
```
332
333
### Performance Comparison with Standard Scikit-learn
334
335
```python
336
import time
337
import numpy as np
338
from sklearn.datasets import make_classification
339
340
# Generate large dataset
341
X, y = make_classification(
342
n_samples=10000, n_features=50, n_informative=25,
343
n_classes=5, random_state=42
344
)
345
346
# Intel-optimized Random Forest
347
from sklearnex.ensemble import RandomForestClassifier as IntelRF
348
349
start_time = time.time()
350
intel_rf = IntelRF(n_estimators=100, random_state=42, n_jobs=-1)
351
intel_rf.fit(X, y)
352
intel_time = time.time() - start_time
353
intel_accuracy = intel_rf.score(X, y)
354
355
print(f"Intel Random Forest:")
356
print(f" Training Time: {intel_time:.2f} seconds")
357
print(f" Accuracy: {intel_accuracy:.3f}")
358
359
# Standard scikit-learn Random Forest (for comparison)
360
from sklearn.ensemble import RandomForestClassifier as StandardRF
361
362
start_time = time.time()
363
standard_rf = StandardRF(n_estimators=100, random_state=42, n_jobs=-1)
364
standard_rf.fit(X, y)
365
standard_time = time.time() - start_time
366
standard_accuracy = standard_rf.score(X, y)
367
368
print(f"\nStandard Random Forest:")
369
print(f" Training Time: {standard_time:.2f} seconds")
370
print(f" Accuracy: {standard_accuracy:.3f}")
371
print(f" Speedup: {standard_time / intel_time:.1f}x")
372
```
373
374
## Performance Notes
375
376
- Significant speedups on datasets with >1000 samples and >10 features
377
- Tree construction is highly optimized with Intel acceleration
378
- Parallel processing scales well with available CPU cores
379
- Memory usage comparable to standard scikit-learn implementations
380
- Feature importance calculations are accelerated
381
- Out-of-bag scoring benefits from optimization when enabled