Machine Learning Library Extensions providing essential tools for day-to-day data science tasks
—
Ensemble regression methods including stacking for improved prediction performance and linear regression with gradient descent optimization.
Linear regression with gradient descent optimization and configurable learning parameters.
class LinearRegression:
def __init__(self, eta=0.01, epochs=50, minibatches=None, random_seed=None,
print_progress=0):
"""
Linear regression with gradient descent.
Parameters:
- eta: float, learning rate
- epochs: int, number of training epochs
- minibatches: int, number of minibatches for SGD (None for batch GD)
- random_seed: int, random seed for reproducibility
- print_progress: int, print cost every n epochs (0 for no output)
"""
def fit(self, X, y):
"""
Fit linear regression model.
Parameters:
- X: array-like, feature matrix (shape: [n_samples, n_features])
- y: array-like, target values (shape: [n_samples])
Returns:
- self: fitted estimator
"""
def predict(self, X):
"""
Make predictions using fitted model.
Parameters:
- X: array-like, feature matrix
Returns:
- predictions: array, predicted values
"""
w_: # Fitted weights/coefficients
b_: # Fitted bias/intercept
cost_: # Training cost historyMeta-learning ensemble regressor that combines multiple base regressors using a meta-regressor.
class StackingRegressor:
def __init__(self, regressors, meta_regressor, verbose=0,
use_features_in_secondary=False):
"""
Stacking ensemble regressor.
Parameters:
- regressors: list, base regression models
- meta_regressor: regressor, meta-learning model
- verbose: int, verbosity level
- use_features_in_secondary: bool, include original features in meta-learning
"""
def fit(self, X, y):
"""Fit stacking regressor"""
def predict(self, X):
"""Make predictions using meta-regressor"""
def get_params(self, deep=True):
"""Get parameters for this estimator"""
def set_params(self, **params):
"""Set parameters for this estimator"""Cross-validation stacking regressor that uses cross-validation to generate meta-features.
class StackingCVRegressor:
def __init__(self, regressors, meta_regressor, cv=2, shuffle=True,
random_state=0, verbose=0, use_features_in_secondary=False,
store_train_meta_features=False, use_clones=True, n_jobs=1):
"""
Cross-validation stacking regressor.
Parameters:
- regressors: list, base regression models
- meta_regressor: regressor, meta-learning model
- cv: int, number of cross-validation folds
- shuffle: bool, shuffle data before cross-validation
- random_state: int, random state for cross-validation
- verbose: int, verbosity level
- use_features_in_secondary: bool, include original features
- store_train_meta_features: bool, store training meta-features
- use_clones: bool, clone base regressors
- n_jobs: int, number of parallel jobs
"""
def fit(self, X, y, groups=None):
"""Fit CV stacking regressor"""
def predict(self, X):
"""Make predictions using meta-regressor"""
def predict_meta_features(self, X):
"""Generate meta-features from base regressors"""from mlxtend.regressor import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
# Generate regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train linear regression
lr = LinearRegression(eta=0.01, epochs=100, print_progress=10)
lr.fit(X_train, y_train)
# Make predictions
y_pred = lr.predict(X_test)
# Plot results
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, alpha=0.6, label='True values')
plt.scatter(X_test, y_pred, alpha=0.6, label='Predictions')
plt.plot(X_test, y_pred, 'r--', alpha=0.8)
plt.xlabel('Feature')
plt.ylabel('Target')
plt.title('Linear Regression Results')
plt.legend()
plt.show()
# Plot cost history
plt.figure(figsize=(8, 6))
plt.plot(lr.cost_)
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.title('Training Cost History')
plt.show()from mlxtend.regressor import StackingCVRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression as SklearnLR
from sklearn.svm import SVR
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
# Generate dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Create base regressors
rf = RandomForestRegressor(n_estimators=50, random_state=42)
svr = SVR(kernel='rbf', C=0.1, gamma=0.1)
lr_sklearn = SklearnLR()
# Create meta-regressor
meta_regressor = LinearRegression()
# Create stacking regressor
stacking_regressor = StackingCVRegressor(
regressors=[rf, svr, lr_sklearn],
meta_regressor=meta_regressor,
cv=5,
use_features_in_secondary=True
)
# Fit and predict
stacking_regressor.fit(X_train, y_train)
y_pred_stacking = stacking_regressor.predict(X_test)
# Compare with individual regressors
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
# Calculate errors
mse_stacking = mean_squared_error(y_test, y_pred_stacking)
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Stacking Regressor MSE: {mse_stacking:.4f}")
print(f"Random Forest MSE: {mse_rf:.4f}")
print(f"Improvement: {((mse_rf - mse_stacking) / mse_rf * 100):.2f}%")from mlxtend.regressor import StackingRegressor, StackingCVRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
import numpy as np
# Generate dataset
X, y = make_regression(n_samples=500, n_features=8, noise=0.1, random_state=42)
# Create base regressors
base_regressors = [
RandomForestRegressor(n_estimators=30, random_state=42),
GradientBoostingRegressor(n_estimators=30, random_state=42),
Ridge(alpha=1.0)
]
# Create meta-regressor
meta_regressor = Ridge(alpha=0.1)
# Compare stacking strategies
regressors = {
'Regular Stacking': StackingRegressor(base_regressors, meta_regressor),
'CV Stacking': StackingCVRegressor(base_regressors, meta_regressor, cv=5),
'CV Stacking + Features': StackingCVRegressor(
base_regressors, meta_regressor, cv=5, use_features_in_secondary=True
)
}
# Evaluate each strategy
for name, regressor in regressors.items():
scores = cross_val_score(regressor, X, y, cv=5, scoring='neg_mean_squared_error')
rmse_scores = np.sqrt(-scores)
print(f"{name}:")
print(f" RMSE: {rmse_scores.mean():.4f} (+/- {rmse_scores.std() * 2:.4f})")Install with Tessl CLI
npx tessl i tessl/pypi-mlxtend