tessl/pypi-yellowbrick

A suite of visual analysis and diagnostic tools for machine learning.

Overview

Eval results

Files

Regression Analysis

Name: tessl/pypi-yellowbrick
Author: tessl

Diagnostic visualizers for regression model evaluation, providing insights into prediction accuracy, residual patterns, model assumptions, and outlier detection. These tools help assess regression model performance and guide model improvement.

Capabilities

Residuals Analysis

Residuals plots for evaluating regression model assumptions, detecting heteroscedasticity, non-linearity, and outliers. Essential for validating linear regression assumptions and identifying model inadequacies.

class ResidualsPlot(RegressionScoreVisualizer):
    """
    Residuals plot visualizer for regression models.
    
    Parameters:
    - estimator: scikit-learn regressor
    - hist: bool, whether to draw histogram of residuals
    - qqplot: bool, whether to draw Q-Q plot of residuals
    """
    def __init__(self, estimator, hist=True, qqplot=False, **kwargs): ...
    def fit(self, X, y, **kwargs): ...
    def score(self, X, y, **kwargs): ...
    def show(self, **kwargs): ...

def residuals_plot(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs):
    """
    Functional API for residuals plot visualization.
    
    Parameters:
    - estimator: scikit-learn regressor
    - X_train: training features
    - y_train: training target values
    - X_test: test features (optional)
    - y_test: test target values (optional)
    
    Returns:
    ResidualsPlot visualizer instance
    """

Usage Example:

from yellowbrick.regressor import ResidualsPlot, residuals_plot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Class-based API
model = LinearRegression()
visualizer = ResidualsPlot(model, hist=True, qqplot=True)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

# Functional API
residuals_plot(model, X_train, y_train, X_test, y_test, hist=True)

Prediction Error Analysis

Prediction error plots showing the relationship between predicted and actual values, helping assess overall model accuracy and identify systematic prediction errors.

class PredictionError(RegressionScoreVisualizer):
    """
    Prediction error visualizer for regression models.
    
    Parameters:
    - estimator: scikit-learn regressor
    - identity: bool, whether to draw identity line (perfect prediction)
    - bestfit: bool, whether to draw best fit line through predictions
    - alpha: float, transparency of scatter points
    """
    def __init__(self, estimator, identity=True, bestfit=True, alpha=0.75, **kwargs): ...
    def fit(self, X, y, **kwargs): ...
    def score(self, X, y, **kwargs): ...
    def show(self, **kwargs): ...

def prediction_error(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs):
    """
    Functional API for prediction error visualization.
    
    Parameters:
    - estimator: scikit-learn regressor
    - X_train: training features
    - y_train: training target values
    - X_test: test features (optional)
    - y_test: test target values (optional)
    
    Returns:
    PredictionError visualizer instance
    """

Alpha Selection

Regularization parameter (alpha) selection for regularized regression models like Ridge, Lasso, and Elastic Net. Helps find optimal regularization strength through cross-validation.

class AlphaSelection(RegressionScoreVisualizer):
    """
    Alpha selection visualizer for regularized regression models.
    
    Parameters:
    - estimator: scikit-learn regularized regressor (Ridge, Lasso, etc.)
    - alphas: array-like, alpha values to test
    - cv: int or cross-validation generator, cross-validation strategy
    - scoring: str, scoring metric for evaluation
    - normalize_error_bars: bool, whether to normalize error bars
    """
    def __init__(self, estimator, alphas=None, cv=None, scoring='neg_mean_squared_error', normalize_error_bars=False, **kwargs): ...
    def fit(self, X, y, **kwargs): ...
    def show(self, **kwargs): ...

class ManualAlphaSelection(RegressionScoreVisualizer):
    """
    Manual alpha selection visualizer with user-specified alpha values.
    
    Parameters:
    - estimator: scikit-learn regularized regressor
    - alphas: array-like, specific alpha values to evaluate
    - cv: int or cross-validation generator
    - scoring: str, scoring metric
    """
    def __init__(self, estimator, alphas, cv=None, scoring='neg_mean_squared_error', **kwargs): ...
    def fit(self, X, y, **kwargs): ...
    def show(self, **kwargs): ...

Usage Example:

from yellowbrick.regressor import AlphaSelection
from sklearn.linear_model import Ridge
import numpy as np

# Alpha selection for Ridge regression
alphas = np.logspace(-3, 3, 50)
model = Ridge()
alpha_viz = AlphaSelection(model, alphas=alphas, cv=5)
alpha_viz.fit(X, y)
alpha_viz.show()

# Get optimal alpha
optimal_alpha = alpha_viz.alpha_

Cook's Distance

Cook's distance analysis for identifying influential observations that disproportionately affect regression model parameters. Helps detect outliers and leverage points.

class CooksDistance(RegressionScoreVisualizer):
    """
    Cook's distance visualizer for influence analysis.
    
    Parameters:
    - estimator: scikit-learn regressor
    - draw_threshold: bool, whether to draw influence threshold line
    - linefmt: str, format string for threshold line
    """
    def __init__(self, estimator, draw_threshold=True, linefmt='r--', **kwargs): ...
    def fit(self, X, y, **kwargs): ...
    def show(self, **kwargs): ...

Usage Example:

from yellowbrick.regressor import CooksDistance
from sklearn.linear_model import LinearRegression

# Analyze influential observations
model = LinearRegression()
cooks_viz = CooksDistance(model)
cooks_viz.fit(X, y)
cooks_viz.show()

# Access Cook's distance values
distances = cooks_viz.distance_
influential_points = cooks_viz.outliers_

Base Classes

class RegressionScoreVisualizer(ScoreVisualizer):
    """
    Base class for regression scoring visualizers.
    Provides common functionality for regression model evaluation.
    """
    def __init__(self, estimator, **kwargs): ...
    def fit(self, X, y, **kwargs): ...
    def score(self, X, y, **kwargs): ...

Usage Patterns

Comprehensive Regression Diagnostics

from yellowbrick.regressor import ResidualsPlot, PredictionError, CooksDistance
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Prepare data and model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LinearRegression()

# Residuals analysis - check assumptions
residuals_viz = ResidualsPlot(model, hist=True, qqplot=True)
residuals_viz.fit(X_train, y_train)
residuals_viz.score(X_test, y_test)
residuals_viz.show()

# Prediction accuracy assessment
pred_error_viz = PredictionError(model)
pred_error_viz.fit(X_train, y_train)
pred_error_viz.score(X_test, y_test)
pred_error_viz.show()

# Influence analysis
cooks_viz = CooksDistance(model)
cooks_viz.fit(X_train, y_train)
cooks_viz.show()

Regularized Regression Tuning

from yellowbrick.regressor import AlphaSelection
from sklearn.linear_model import Ridge, Lasso, ElasticNet
import numpy as np

# Ridge regression alpha selection
ridge_alphas = np.logspace(-3, 3, 50)
ridge_model = Ridge()
ridge_alpha_viz = AlphaSelection(ridge_model, alphas=ridge_alphas, cv=10)
ridge_alpha_viz.fit(X, y)
ridge_alpha_viz.show()

# Lasso regression alpha selection
lasso_alphas = np.logspace(-4, 1, 50)
lasso_model = Lasso()
lasso_alpha_viz = AlphaSelection(lasso_model, alphas=lasso_alphas, cv=10)
lasso_alpha_viz.fit(X, y)
lasso_alpha_viz.show()

# ElasticNet alpha selection
elastic_alphas = np.logspace(-4, 1, 20)
elastic_model = ElasticNet(l1_ratio=0.5)
elastic_alpha_viz = AlphaSelection(elastic_model, alphas=elastic_alphas, cv=10)
elastic_alpha_viz.fit(X, y)
elastic_alpha_viz.show()

Model Comparison

from yellowbrick.regressor import PredictionError
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# Compare multiple regression models
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(alpha=1.0),
    'Lasso': Lasso(alpha=0.1),
    'Random Forest': RandomForestRegressor(n_estimators=100)
}

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.ravel()

for idx, (name, model) in enumerate(models.items()):
    viz = PredictionError(model, ax=axes[idx])
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.finalize()

plt.tight_layout()
plt.show()

Pipeline Integration

from yellowbrick.regressor import ResidualsPlot
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Create pipeline with preprocessing
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', LinearRegression())
])

# Visualize pipeline results
viz = ResidualsPlot(pipeline)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.show()

Advanced Residuals Analysis

from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Create comprehensive residuals analysis
model = LinearRegression()

# Standard residuals plot
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Fitted values vs residuals
viz1 = ResidualsPlot(model, ax=axes[0], hist=False, qqplot=False)
viz1.fit(X_train, y_train)
viz1.score(X_test, y_test)
viz1.finalize()

# Histogram of residuals
viz2 = ResidualsPlot(model, ax=axes[1], hist=True, qqplot=False)
viz2.fit(X_train, y_train)
viz2.score(X_test, y_test)
viz2.finalize()

# Q-Q plot of residuals
viz3 = ResidualsPlot(model, ax=axes[2], hist=False, qqplot=True)
viz3.fit(X_train, y_train)
viz3.score(X_test, y_test)
viz3.finalize()

plt.tight_layout()
plt.show()

Install with Tessl CLI