tessl/pypi-scikit-learn-intelex

Intel Extension for Scikit-learn providing hardware-accelerated implementations of scikit-learn algorithms optimized for Intel CPUs and GPUs.

—

Pending

Overview

Eval results

Files

Ensemble Methods

Name: tessl/pypi-scikit-learn-intelex
Author: tessl

Intel-accelerated ensemble algorithms including Random Forest and Extra Trees for both classification and regression. These implementations provide significant performance improvements through optimized tree construction and parallel processing.

Capabilities

Random Forest Classifier

Intel-optimized Random Forest for classification with accelerated tree building and prediction.

class RandomForestClassifier:
    """
    Random Forest classifier with Intel optimization.
    
    Ensemble of decision trees with optimized parallel tree construction
    and Intel hardware acceleration for improved performance.
    """
    
    def __init__(
        self,
        n_estimators=100,
        criterion='gini',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features='sqrt',
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        bootstrap=True,
        oob_score=False,
        n_jobs=None,
        random_state=None,
        verbose=0,
        warm_start=False,
        class_weight=None,
        ccp_alpha=0.0,
        max_samples=None
    ):
        """Initialize Random Forest Classifier with Intel optimization."""
    
    def fit(self, X, y, sample_weight=None):
        """
        Build forest of trees from training set.
        
        Parameters:
            X (array-like): Training data
            y (array-like): Target values
            sample_weight (array-like): Sample weights
            
        Returns:
            self: Fitted estimator
        """
    
    def predict(self, X):
        """Predict class for samples."""
    
    def predict_proba(self, X):
        """Predict class probabilities."""
    
    def predict_log_proba(self, X):
        """Predict class log-probabilities."""
    
    def score(self, X, y, sample_weight=None):
        """Return mean accuracy."""
    
    # Attributes
    estimators_: ...         # Collection of fitted sub-estimators
    classes_: ...           # Class labels
    n_classes_: ...         # Number of classes
    feature_importances_: ... # Feature importances
    n_features_in_: ...     # Number of features
    oob_score_: ...         # Out-of-bag score

Random Forest Regressor

Intel-optimized Random Forest for regression tasks.

class RandomForestRegressor:
    """
    Random Forest regressor with Intel optimization.
    
    Ensemble of decision trees optimized for regression with
    Intel hardware acceleration.
    """
    
    def __init__(
        self,
        n_estimators=100,
        criterion='squared_error',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features=1.0,
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        bootstrap=True,
        oob_score=False,
        n_jobs=None,
        random_state=None,
        verbose=0,
        warm_start=False,
        ccp_alpha=0.0,
        max_samples=None
    ):
        """Initialize Random Forest Regressor with Intel optimization."""
    
    def fit(self, X, y, sample_weight=None):
        """Build forest of trees."""
    
    def predict(self, X):
        """Predict regression target."""
    
    def score(self, X, y, sample_weight=None):
        """Return R² score."""
    
    # Attributes
    estimators_: ...
    feature_importances_: ...
    n_features_in_: ...
    oob_score_: ...

Extra Trees Classifier

Extremely Randomized Trees classifier with Intel optimization.

class ExtraTreesClassifier:
    """
    Extra Trees classifier with Intel optimization.
    
    Ensemble method using extremely randomized trees with
    optimized tree construction algorithms.
    """
    
    def __init__(
        self,
        n_estimators=100,
        criterion='gini',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features='sqrt',
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        bootstrap=False,
        oob_score=False,
        n_jobs=None,
        random_state=None,
        verbose=0,
        warm_start=False,
        class_weight=None,
        ccp_alpha=0.0,
        max_samples=None
    ):
        """Initialize Extra Trees Classifier."""
    
    def fit(self, X, y, sample_weight=None):
        """Build forest of extremely randomized trees."""
    
    def predict(self, X):
        """Predict class for samples."""
    
    def predict_proba(self, X):
        """Predict class probabilities."""
    
    # Attributes similar to RandomForestClassifier

Extra Trees Regressor

Extremely Randomized Trees regressor with Intel optimization.

class ExtraTreesRegressor:
    """
    Extra Trees regressor with Intel optimization.
    
    Regression ensemble using extremely randomized trees
    with Intel hardware acceleration.
    """
    
    def __init__(
        self,
        n_estimators=100,
        criterion='squared_error',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features=1.0,
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        bootstrap=False,
        oob_score=False,
        n_jobs=None,
        random_state=None,
        verbose=0,
        warm_start=False,
        ccp_alpha=0.0,
        max_samples=None
    ):
        """Initialize Extra Trees Regressor."""
    
    def fit(self, X, y, sample_weight=None):
        """Build forest of extremely randomized trees."""
    
    def predict(self, X):
        """Predict regression target."""
    
    # Attributes similar to RandomForestRegressor

Usage Examples

Random Forest Classification

import numpy as np
from sklearnex.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate classification dataset
X, y = make_classification(
    n_samples=1000, n_features=20, n_informative=10, 
    n_redundant=10, n_classes=3, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train Random Forest
rf = RandomForestClassifier(
    n_estimators=100, 
    max_depth=10, 
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train, y_train)

# Make predictions
y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)
accuracy = rf.score(X_test, y_test)

print(f"Accuracy: {accuracy:.3f}")
print(f"Number of trees: {len(rf.estimators_)}")
print(f"Feature importances shape: {rf.feature_importances_.shape}")

# Top 5 most important features
feature_importance = rf.feature_importances_
top_features = np.argsort(feature_importance)[-5:][::-1]
print(f"Top 5 features: {top_features}")

Random Forest Regression

import numpy as np
from sklearnex.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

# Generate regression dataset
X, y = make_regression(
    n_samples=1000, n_features=15, noise=0.1, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train Random Forest Regressor
rf_reg = RandomForestRegressor(
    n_estimators=100,
    max_depth=15,
    min_samples_split=5,
    random_state=42,
    oob_score=True
)
rf_reg.fit(X_train, y_train)

# Evaluate model
y_pred = rf_reg.predict(X_test)
r2_score = rf_reg.score(X_test, y_test)
oob_score = rf_reg.oob_score_

print(f"R² Score: {r2_score:.3f}")
print(f"Out-of-bag Score: {oob_score:.3f}")
print(f"Feature importances sum: {rf_reg.feature_importances_.sum():.3f}")

Comparing Ensemble Methods

import time
import numpy as np
from sklearnex.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score

# Generate dataset
X, y = make_classification(
    n_samples=2000, n_features=30, n_informative=15,
    n_classes=4, random_state=42
)

# Compare Random Forest vs Extra Trees
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42)
}

for name, model in models.items():
    start_time = time.time()
    
    # Cross-validation
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    
    fit_time = time.time() - start_time
    
    print(f"{name}:")
    print(f"  Mean CV Accuracy: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")
    print(f"  Training Time: {fit_time:.2f} seconds")
    
    # Fit for feature importance analysis
    model.fit(X, y)
    print(f"  Feature Importance Range: {model.feature_importances_.min():.4f} - {model.feature_importances_.max():.4f}")
    print()

Performance Comparison with Standard Scikit-learn

import time
import numpy as np
from sklearn.datasets import make_classification

# Generate large dataset
X, y = make_classification(
    n_samples=10000, n_features=50, n_informative=25,
    n_classes=5, random_state=42
)

# Intel-optimized Random Forest
from sklearnex.ensemble import RandomForestClassifier as IntelRF

start_time = time.time()
intel_rf = IntelRF(n_estimators=100, random_state=42, n_jobs=-1)
intel_rf.fit(X, y)
intel_time = time.time() - start_time
intel_accuracy = intel_rf.score(X, y)

print(f"Intel Random Forest:")
print(f"  Training Time: {intel_time:.2f} seconds")
print(f"  Accuracy: {intel_accuracy:.3f}")

# Standard scikit-learn Random Forest (for comparison)
from sklearn.ensemble import RandomForestClassifier as StandardRF

start_time = time.time()
standard_rf = StandardRF(n_estimators=100, random_state=42, n_jobs=-1)
standard_rf.fit(X, y)
standard_time = time.time() - start_time
standard_accuracy = standard_rf.score(X, y)

print(f"\nStandard Random Forest:")
print(f"  Training Time: {standard_time:.2f} seconds")
print(f"  Accuracy: {standard_accuracy:.3f}")
print(f"  Speedup: {standard_time / intel_time:.1f}x")

Performance Notes

Significant speedups on datasets with >1000 samples and >10 features
Tree construction is highly optimized with Intel acceleration
Parallel processing scales well with available CPU cores
Memory usage comparable to standard scikit-learn implementations
Feature importance calculations are accelerated
Out-of-bag scoring benefits from optimization when enabled

Install with Tessl CLI

npx tessl i tessl/pypi-scikit-learn-intelex

docs

metrics-model-selection.md