Intel Extension for Scikit-learn providing hardware-accelerated implementations of scikit-learn algorithms optimized for Intel CPUs and GPUs.
—
Intel-accelerated ensemble algorithms including Random Forest and Extra Trees for both classification and regression. These implementations provide significant performance improvements through optimized tree construction and parallel processing.
Intel-optimized Random Forest for classification with accelerated tree building and prediction.
class RandomForestClassifier:
"""
Random Forest classifier with Intel optimization.
Ensemble of decision trees with optimized parallel tree construction
and Intel hardware acceleration for improved performance.
"""
def __init__(
self,
n_estimators=100,
criterion='gini',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features='sqrt',
max_leaf_nodes=None,
min_impurity_decrease=0.0,
bootstrap=True,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
class_weight=None,
ccp_alpha=0.0,
max_samples=None
):
"""Initialize Random Forest Classifier with Intel optimization."""
def fit(self, X, y, sample_weight=None):
"""
Build forest of trees from training set.
Parameters:
X (array-like): Training data
y (array-like): Target values
sample_weight (array-like): Sample weights
Returns:
self: Fitted estimator
"""
def predict(self, X):
"""Predict class for samples."""
def predict_proba(self, X):
"""Predict class probabilities."""
def predict_log_proba(self, X):
"""Predict class log-probabilities."""
def score(self, X, y, sample_weight=None):
"""Return mean accuracy."""
# Attributes
estimators_: ... # Collection of fitted sub-estimators
classes_: ... # Class labels
n_classes_: ... # Number of classes
feature_importances_: ... # Feature importances
n_features_in_: ... # Number of features
oob_score_: ... # Out-of-bag scoreIntel-optimized Random Forest for regression tasks.
class RandomForestRegressor:
"""
Random Forest regressor with Intel optimization.
Ensemble of decision trees optimized for regression with
Intel hardware acceleration.
"""
def __init__(
self,
n_estimators=100,
criterion='squared_error',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features=1.0,
max_leaf_nodes=None,
min_impurity_decrease=0.0,
bootstrap=True,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
ccp_alpha=0.0,
max_samples=None
):
"""Initialize Random Forest Regressor with Intel optimization."""
def fit(self, X, y, sample_weight=None):
"""Build forest of trees."""
def predict(self, X):
"""Predict regression target."""
def score(self, X, y, sample_weight=None):
"""Return R² score."""
# Attributes
estimators_: ...
feature_importances_: ...
n_features_in_: ...
oob_score_: ...Extremely Randomized Trees classifier with Intel optimization.
class ExtraTreesClassifier:
"""
Extra Trees classifier with Intel optimization.
Ensemble method using extremely randomized trees with
optimized tree construction algorithms.
"""
def __init__(
self,
n_estimators=100,
criterion='gini',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features='sqrt',
max_leaf_nodes=None,
min_impurity_decrease=0.0,
bootstrap=False,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
class_weight=None,
ccp_alpha=0.0,
max_samples=None
):
"""Initialize Extra Trees Classifier."""
def fit(self, X, y, sample_weight=None):
"""Build forest of extremely randomized trees."""
def predict(self, X):
"""Predict class for samples."""
def predict_proba(self, X):
"""Predict class probabilities."""
# Attributes similar to RandomForestClassifierExtremely Randomized Trees regressor with Intel optimization.
class ExtraTreesRegressor:
"""
Extra Trees regressor with Intel optimization.
Regression ensemble using extremely randomized trees
with Intel hardware acceleration.
"""
def __init__(
self,
n_estimators=100,
criterion='squared_error',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features=1.0,
max_leaf_nodes=None,
min_impurity_decrease=0.0,
bootstrap=False,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
ccp_alpha=0.0,
max_samples=None
):
"""Initialize Extra Trees Regressor."""
def fit(self, X, y, sample_weight=None):
"""Build forest of extremely randomized trees."""
def predict(self, X):
"""Predict regression target."""
# Attributes similar to RandomForestRegressorimport numpy as np
from sklearnex.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
# Generate classification dataset
X, y = make_classification(
n_samples=1000, n_features=20, n_informative=10,
n_redundant=10, n_classes=3, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and train Random Forest
rf = RandomForestClassifier(
n_estimators=100,
max_depth=10,
random_state=42,
n_jobs=-1
)
rf.fit(X_train, y_train)
# Make predictions
y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)
accuracy = rf.score(X_test, y_test)
print(f"Accuracy: {accuracy:.3f}")
print(f"Number of trees: {len(rf.estimators_)}")
print(f"Feature importances shape: {rf.feature_importances_.shape}")
# Top 5 most important features
feature_importance = rf.feature_importances_
top_features = np.argsort(feature_importance)[-5:][::-1]
print(f"Top 5 features: {top_features}")import numpy as np
from sklearnex.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
# Generate regression dataset
X, y = make_regression(
n_samples=1000, n_features=15, noise=0.1, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and train Random Forest Regressor
rf_reg = RandomForestRegressor(
n_estimators=100,
max_depth=15,
min_samples_split=5,
random_state=42,
oob_score=True
)
rf_reg.fit(X_train, y_train)
# Evaluate model
y_pred = rf_reg.predict(X_test)
r2_score = rf_reg.score(X_test, y_test)
oob_score = rf_reg.oob_score_
print(f"R² Score: {r2_score:.3f}")
print(f"Out-of-bag Score: {oob_score:.3f}")
print(f"Feature importances sum: {rf_reg.feature_importances_.sum():.3f}")import time
import numpy as np
from sklearnex.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
# Generate dataset
X, y = make_classification(
n_samples=2000, n_features=30, n_informative=15,
n_classes=4, random_state=42
)
# Compare Random Forest vs Extra Trees
models = {
'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42)
}
for name, model in models.items():
start_time = time.time()
# Cross-validation
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
fit_time = time.time() - start_time
print(f"{name}:")
print(f" Mean CV Accuracy: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")
print(f" Training Time: {fit_time:.2f} seconds")
# Fit for feature importance analysis
model.fit(X, y)
print(f" Feature Importance Range: {model.feature_importances_.min():.4f} - {model.feature_importances_.max():.4f}")
print()import time
import numpy as np
from sklearn.datasets import make_classification
# Generate large dataset
X, y = make_classification(
n_samples=10000, n_features=50, n_informative=25,
n_classes=5, random_state=42
)
# Intel-optimized Random Forest
from sklearnex.ensemble import RandomForestClassifier as IntelRF
start_time = time.time()
intel_rf = IntelRF(n_estimators=100, random_state=42, n_jobs=-1)
intel_rf.fit(X, y)
intel_time = time.time() - start_time
intel_accuracy = intel_rf.score(X, y)
print(f"Intel Random Forest:")
print(f" Training Time: {intel_time:.2f} seconds")
print(f" Accuracy: {intel_accuracy:.3f}")
# Standard scikit-learn Random Forest (for comparison)
from sklearn.ensemble import RandomForestClassifier as StandardRF
start_time = time.time()
standard_rf = StandardRF(n_estimators=100, random_state=42, n_jobs=-1)
standard_rf.fit(X, y)
standard_time = time.time() - start_time
standard_accuracy = standard_rf.score(X, y)
print(f"\nStandard Random Forest:")
print(f" Training Time: {standard_time:.2f} seconds")
print(f" Accuracy: {standard_accuracy:.3f}")
print(f" Speedup: {standard_time / intel_time:.1f}x")Install with Tessl CLI
npx tessl i tessl/pypi-scikit-learn-intelex