Tessl Tile for pypi/scikit-learn-intelex@2024.7.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced.md clustering.md daal4py-mb.md decomposition.md ensemble.md index.md linear-models.md metrics-model-selection.md neighbors.md patching-config.md stats-manifold.md svm.md

ensemble.mddocs/

0
# Ensemble Methods
1

2
Intel-accelerated ensemble algorithms including Random Forest and Extra Trees for both classification and regression. These implementations provide significant performance improvements through optimized tree construction and parallel processing.
3

4
## Capabilities
5

6
### Random Forest Classifier
7

8
Intel-optimized Random Forest for classification with accelerated tree building and prediction.
9

10
```python { .api }
11
class RandomForestClassifier:
12
    """
13
    Random Forest classifier with Intel optimization.
14
    
15
    Ensemble of decision trees with optimized parallel tree construction
16
    and Intel hardware acceleration for improved performance.
17
    """
18
    
19
    def __init__(
20
        self,
21
        n_estimators=100,
22
        criterion='gini',
23
        max_depth=None,
24
        min_samples_split=2,
25
        min_samples_leaf=1,
26
        min_weight_fraction_leaf=0.0,
27
        max_features='sqrt',
28
        max_leaf_nodes=None,
29
        min_impurity_decrease=0.0,
30
        bootstrap=True,
31
        oob_score=False,
32
        n_jobs=None,
33
        random_state=None,
34
        verbose=0,
35
        warm_start=False,
36
        class_weight=None,
37
        ccp_alpha=0.0,
38
        max_samples=None
39
    ):
40
        """Initialize Random Forest Classifier with Intel optimization."""
41
    
42
    def fit(self, X, y, sample_weight=None):
43
        """
44
        Build forest of trees from training set.
45
        
46
        Parameters:
47
            X (array-like): Training data
48
            y (array-like): Target values
49
            sample_weight (array-like): Sample weights
50
            
51
        Returns:
52
            self: Fitted estimator
53
        """
54
    
55
    def predict(self, X):
56
        """Predict class for samples."""
57
    
58
    def predict_proba(self, X):
59
        """Predict class probabilities."""
60
    
61
    def predict_log_proba(self, X):
62
        """Predict class log-probabilities."""
63
    
64
    def score(self, X, y, sample_weight=None):
65
        """Return mean accuracy."""
66
    
67
    # Attributes
68
    estimators_: ...         # Collection of fitted sub-estimators
69
    classes_: ...           # Class labels
70
    n_classes_: ...         # Number of classes
71
    feature_importances_: ... # Feature importances
72
    n_features_in_: ...     # Number of features
73
    oob_score_: ...         # Out-of-bag score
74
```
75

76
### Random Forest Regressor
77

78
Intel-optimized Random Forest for regression tasks.
79

80
```python { .api }
81
class RandomForestRegressor:
82
    """
83
    Random Forest regressor with Intel optimization.
84
    
85
    Ensemble of decision trees optimized for regression with
86
    Intel hardware acceleration.
87
    """
88
    
89
    def __init__(
90
        self,
91
        n_estimators=100,
92
        criterion='squared_error',
93
        max_depth=None,
94
        min_samples_split=2,
95
        min_samples_leaf=1,
96
        min_weight_fraction_leaf=0.0,
97
        max_features=1.0,
98
        max_leaf_nodes=None,
99
        min_impurity_decrease=0.0,
100
        bootstrap=True,
101
        oob_score=False,
102
        n_jobs=None,
103
        random_state=None,
104
        verbose=0,
105
        warm_start=False,
106
        ccp_alpha=0.0,
107
        max_samples=None
108
    ):
109
        """Initialize Random Forest Regressor with Intel optimization."""
110
    
111
    def fit(self, X, y, sample_weight=None):
112
        """Build forest of trees."""
113
    
114
    def predict(self, X):
115
        """Predict regression target."""
116
    
117
    def score(self, X, y, sample_weight=None):
118
        """Return R² score."""
119
    
120
    # Attributes
121
    estimators_: ...
122
    feature_importances_: ...
123
    n_features_in_: ...
124
    oob_score_: ...
125
```
126

127
### Extra Trees Classifier
128

129
Extremely Randomized Trees classifier with Intel optimization.
130

131
```python { .api }
132
class ExtraTreesClassifier:
133
    """
134
    Extra Trees classifier with Intel optimization.
135
    
136
    Ensemble method using extremely randomized trees with
137
    optimized tree construction algorithms.
138
    """
139
    
140
    def __init__(
141
        self,
142
        n_estimators=100,
143
        criterion='gini',
144
        max_depth=None,
145
        min_samples_split=2,
146
        min_samples_leaf=1,
147
        min_weight_fraction_leaf=0.0,
148
        max_features='sqrt',
149
        max_leaf_nodes=None,
150
        min_impurity_decrease=0.0,
151
        bootstrap=False,
152
        oob_score=False,
153
        n_jobs=None,
154
        random_state=None,
155
        verbose=0,
156
        warm_start=False,
157
        class_weight=None,
158
        ccp_alpha=0.0,
159
        max_samples=None
160
    ):
161
        """Initialize Extra Trees Classifier."""
162
    
163
    def fit(self, X, y, sample_weight=None):
164
        """Build forest of extremely randomized trees."""
165
    
166
    def predict(self, X):
167
        """Predict class for samples."""
168
    
169
    def predict_proba(self, X):
170
        """Predict class probabilities."""
171
    
172
    # Attributes similar to RandomForestClassifier
173
```
174

175
### Extra Trees Regressor
176

177
Extremely Randomized Trees regressor with Intel optimization.
178

179
```python { .api }
180
class ExtraTreesRegressor:
181
    """
182
    Extra Trees regressor with Intel optimization.
183
    
184
    Regression ensemble using extremely randomized trees
185
    with Intel hardware acceleration.
186
    """
187
    
188
    def __init__(
189
        self,
190
        n_estimators=100,
191
        criterion='squared_error',
192
        max_depth=None,
193
        min_samples_split=2,
194
        min_samples_leaf=1,
195
        min_weight_fraction_leaf=0.0,
196
        max_features=1.0,
197
        max_leaf_nodes=None,
198
        min_impurity_decrease=0.0,
199
        bootstrap=False,
200
        oob_score=False,
201
        n_jobs=None,
202
        random_state=None,
203
        verbose=0,
204
        warm_start=False,
205
        ccp_alpha=0.0,
206
        max_samples=None
207
    ):
208
        """Initialize Extra Trees Regressor."""
209
    
210
    def fit(self, X, y, sample_weight=None):
211
        """Build forest of extremely randomized trees."""
212
    
213
    def predict(self, X):
214
        """Predict regression target."""
215
    
216
    # Attributes similar to RandomForestRegressor
217
```
218

219
## Usage Examples
220

221
### Random Forest Classification
222

223
```python
224
import numpy as np
225
from sklearnex.ensemble import RandomForestClassifier
226
from sklearn.datasets import make_classification
227
from sklearn.model_selection import train_test_split
228

229
# Generate classification dataset
230
X, y = make_classification(
231
    n_samples=1000, n_features=20, n_informative=10, 
232
    n_redundant=10, n_classes=3, random_state=42
233
)
234
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
235

236
# Create and train Random Forest
237
rf = RandomForestClassifier(
238
    n_estimators=100, 
239
    max_depth=10, 
240
    random_state=42,
241
    n_jobs=-1
242
)
243
rf.fit(X_train, y_train)
244

245
# Make predictions
246
y_pred = rf.predict(X_test)
247
y_proba = rf.predict_proba(X_test)
248
accuracy = rf.score(X_test, y_test)
249

250
print(f"Accuracy: {accuracy:.3f}")
251
print(f"Number of trees: {len(rf.estimators_)}")
252
print(f"Feature importances shape: {rf.feature_importances_.shape}")
253

254
# Top 5 most important features
255
feature_importance = rf.feature_importances_
256
top_features = np.argsort(feature_importance)[-5:][::-1]
257
print(f"Top 5 features: {top_features}")
258
```
259

260
### Random Forest Regression
261

262
```python
263
import numpy as np
264
from sklearnex.ensemble import RandomForestRegressor
265
from sklearn.datasets import make_regression
266
from sklearn.model_selection import train_test_split
267

268
# Generate regression dataset
269
X, y = make_regression(
270
    n_samples=1000, n_features=15, noise=0.1, random_state=42
271
)
272
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
273

274
# Create and train Random Forest Regressor
275
rf_reg = RandomForestRegressor(
276
    n_estimators=100,
277
    max_depth=15,
278
    min_samples_split=5,
279
    random_state=42,
280
    oob_score=True
281
)
282
rf_reg.fit(X_train, y_train)
283

284
# Evaluate model
285
y_pred = rf_reg.predict(X_test)
286
r2_score = rf_reg.score(X_test, y_test)
287
oob_score = rf_reg.oob_score_
288

289
print(f"R² Score: {r2_score:.3f}")
290
print(f"Out-of-bag Score: {oob_score:.3f}")
291
print(f"Feature importances sum: {rf_reg.feature_importances_.sum():.3f}")
292
```
293

294
### Comparing Ensemble Methods
295

296
```python
297
import time
298
import numpy as np
299
from sklearnex.ensemble import RandomForestClassifier, ExtraTreesClassifier
300
from sklearn.datasets import make_classification
301
from sklearn.model_selection import cross_val_score
302

303
# Generate dataset
304
X, y = make_classification(
305
    n_samples=2000, n_features=30, n_informative=15,
306
    n_classes=4, random_state=42
307
)
308

309
# Compare Random Forest vs Extra Trees
310
models = {
311
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
312
    'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42)
313
}
314

315
for name, model in models.items():
316
    start_time = time.time()
317
    
318
    # Cross-validation
319
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
320
    
321
    fit_time = time.time() - start_time
322
    
323
    print(f"{name}:")
324
    print(f"  Mean CV Accuracy: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")
325
    print(f"  Training Time: {fit_time:.2f} seconds")
326
    
327
    # Fit for feature importance analysis
328
    model.fit(X, y)
329
    print(f"  Feature Importance Range: {model.feature_importances_.min():.4f} - {model.feature_importances_.max():.4f}")
330
    print()
331
```
332

333
### Performance Comparison with Standard Scikit-learn
334

335
```python
336
import time
337
import numpy as np
338
from sklearn.datasets import make_classification
339

340
# Generate large dataset
341
X, y = make_classification(
342
    n_samples=10000, n_features=50, n_informative=25,
343
    n_classes=5, random_state=42
344
)
345

346
# Intel-optimized Random Forest
347
from sklearnex.ensemble import RandomForestClassifier as IntelRF
348

349
start_time = time.time()
350
intel_rf = IntelRF(n_estimators=100, random_state=42, n_jobs=-1)
351
intel_rf.fit(X, y)
352
intel_time = time.time() - start_time
353
intel_accuracy = intel_rf.score(X, y)
354

355
print(f"Intel Random Forest:")
356
print(f"  Training Time: {intel_time:.2f} seconds")
357
print(f"  Accuracy: {intel_accuracy:.3f}")
358

359
# Standard scikit-learn Random Forest (for comparison)
360
from sklearn.ensemble import RandomForestClassifier as StandardRF
361

362
start_time = time.time()
363
standard_rf = StandardRF(n_estimators=100, random_state=42, n_jobs=-1)
364
standard_rf.fit(X, y)
365
standard_time = time.time() - start_time
366
standard_accuracy = standard_rf.score(X, y)
367

368
print(f"\nStandard Random Forest:")
369
print(f"  Training Time: {standard_time:.2f} seconds")
370
print(f"  Accuracy: {standard_accuracy:.3f}")
371
print(f"  Speedup: {standard_time / intel_time:.1f}x")
372
```
373

374
## Performance Notes
375

376
- Significant speedups on datasets with >1000 samples and >10 features
377
- Tree construction is highly optimized with Intel acceleration
378
- Parallel processing scales well with available CPU cores
379
- Memory usage comparable to standard scikit-learn implementations
380
- Feature importance calculations are accelerated
381
- Out-of-bag scoring benefits from optimization when enabled

Version

Tile

Files

ensemble.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

ensemble.mddocs/