A suite of visual analysis and diagnostic tools for machine learning.
npx @tessl/cli install tessl/pypi-yellowbrick@1.5.0A comprehensive machine learning visualization library that extends scikit-learn with publication-quality visualizations for machine learning model evaluation, selection, and interpretation. Yellowbrick provides visual diagnostic tools called "Visualizers" that combine scikit-learn with matplotlib to streamline the machine learning workflow from data exploration through model interpretation.
pip install yellowbrickimport yellowbrickDirect imports from yellowbrick:
from yellowbrick import ROCAUC, ClassBalance, ClassificationScoreVisualizer
from yellowbrick import anscombe, datasaurus
from yellowbrick import set_aesthetic, set_style, set_palette, color_paletteCommon pattern for visualizers:
from yellowbrick.classifier import ROCAUC, ConfusionMatrix
from yellowbrick.regressor import ResidualsPlot
from yellowbrick.cluster import KElbowFunctional API imports:
from yellowbrick.classifier import roc_auc, confusion_matrix
from yellowbrick.regressor import residuals_plotfrom yellowbrick.classifier import ROCAUC
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Create and fit model
model = LogisticRegression()
# Visualize ROC/AUC curves
visualizer = ROCAUC(model, classes=['Class 0', 'Class 1'])
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()
# Using functional API
from yellowbrick.classifier import roc_auc
roc_auc(model, X_train, y_train, X_test, y_test, classes=['Class 0', 'Class 1'])Yellowbrick follows the scikit-learn API design with Visualizers that inherit from sklearn.base.BaseEstimator:
Visualizer, ModelVisualizer, ScoreVisualizer provide the foundationfit(), score(), and show() methodsComprehensive visualizers for evaluating classification models including ROC curves, confusion matrices, classification reports, class prediction errors, precision-recall curves, and discrimination thresholds.
class ROCAUC(ClassificationScoreVisualizer):
def __init__(self, estimator, ax=None, micro=True, macro=True, per_class=True, binary=False, classes=None, encoder=None, is_fitted="auto", force_model=False, **kwargs): ...
def fit(self, X, y, **kwargs): ...
def score(self, X, y, **kwargs): ...
class ConfusionMatrix(ClassificationScoreVisualizer):
def __init__(self, estimator, ax=None, sample_weight=None, percent=False, classes=None, encoder=None, cmap="YlOrRd", fontsize=None, is_fitted="auto", force_model=False, **kwargs): ...
def fit(self, X, y, **kwargs): ...
def score(self, X, y, **kwargs): ...
class ClassificationReport(ClassificationScoreVisualizer):
def __init__(self, estimator, classes=None, **kwargs): ...
def fit(self, X, y, **kwargs): ...
def score(self, X, y, **kwargs): ...
# Functional APIs
def roc_auc(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs): ...
def confusion_matrix(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs): ...
def classification_report(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs): ...Diagnostic visualizers for regression models including residuals plots, prediction error plots, alpha selection for regularized models, and Cook's distance for influence analysis.
class ResidualsPlot(RegressionScoreVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y, **kwargs): ...
def score(self, X, y, **kwargs): ...
class PredictionError(RegressionScoreVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y, **kwargs): ...
def score(self, X, y, **kwargs): ...
class AlphaSelection(RegressionScoreVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y, **kwargs): ...
def score(self, X, y, **kwargs): ...
# Functional APIs
def residuals_plot(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs): ...
def prediction_error(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs): ...Visualizers for clustering evaluation including elbow method for optimal K selection, silhouette analysis, and intercluster distance mapping.
class KElbow(ClusteringScoreVisualizer):
def __init__(self, estimator, k=10, metric='distortion', **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
class SilhouetteVisualizer(ClusteringScoreVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
class InterclusterDistance(ClusteringScoreVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
# Functional APIs
def kelbow_visualizer(estimator, X, k=10, **kwargs): ...
def silhouette_visualizer(estimator, X, **kwargs): ...Tools for feature selection, analysis, and visualization including feature ranking, correlation analysis, PCA decomposition, manifold learning, and parallel coordinates.
class Rank1D(Visualizer):
def __init__(self, algorithm='shapiro', **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
class Rank2D(Visualizer):
def __init__(self, algorithm='pearson', **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
class PCA(Visualizer):
def __init__(self, scale=True, proj_features=True, **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
class ParallelCoordinates(Visualizer):
def __init__(self, classes=None, **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
# Functional APIs
def rank1d(X, y=None, algorithm='shapiro', **kwargs): ...
def rank2d(X, y=None, algorithm='pearson', **kwargs): ...
def pca_decomposition(X, y=None, **kwargs): ...Visualizers for model selection and hyperparameter tuning including learning curves, validation curves, cross-validation scores, and feature importance analysis.
class LearningCurve(ModelVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y, **kwargs): ...
class ValidationCurve(ModelVisualizer):
def __init__(self, estimator, param_name, param_range, **kwargs): ...
def fit(self, X, y, **kwargs): ...
class FeatureImportances(ModelVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y, **kwargs): ...
class CVScores(ModelVisualizer):
def __init__(self, estimator, **kwargs): ...
def fit(self, X, y, **kwargs): ...
# Functional APIs
def learning_curve(estimator, X, y, **kwargs): ...
def validation_curve(estimator, X, y, param_name, param_range, **kwargs): ...
def feature_importances(estimator, X, y, **kwargs): ...Specialized visualizers for text analysis and natural language processing including t-SNE/UMAP embeddings, frequency distributions, part-of-speech analysis, and word correlation plots.
class TSNEVisualizer(Visualizer):
def __init__(self, **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
class FreqDistVisualizer(Visualizer):
def __init__(self, **kwargs): ...
def fit(self, corpus, **kwargs): ...
class DispersionPlot(Visualizer):
def __init__(self, **kwargs): ...
def fit(self, corpus, **kwargs): ...
# Functional APIs
def tsne(X, y=None, **kwargs): ...
def freqdist(corpus, **kwargs): ...
def dispersion(corpus, **kwargs): ...Built-in datasets for learning and testing, plus utility functions for data management and visualization styling.
# Dataset loaders
def load_concrete(): ...
def load_energy(): ...
def load_credit(): ...
def load_occupancy(): ...
def load_mushroom(): ...
def load_hobbies(): ...
def load_bikeshare(): ...
# Style management
def set_aesthetic(aesthetic='whitegrid'): ...
def set_palette(palette='flatui'): ...
def color_palette(palette=None): ...
# Demo functions
def anscombe(): ...
def datasaurus(): ...from enum import Enum
class TargetType(Enum):
AUTO = "auto"
SINGLE = "single"
DISCRETE = "discrete"
CONTINUOUS = "continuous"
UNKNOWN = "unknown"
# Base visualizer classes
class Visualizer:
def __init__(self, ax=None, fig=None, size=None, color=None, title=None, **kwargs): ...
def fit(self, X, y=None, **kwargs): ...
def transform(self, X): ...
def show(self, outpath=None, **kwargs): ...
def finalize(self, **kwargs): ...
class ModelVisualizer(Visualizer):
def __init__(self, estimator, ax=None, fig=None, is_fitted="auto", **kwargs): ...
class ScoreVisualizer(ModelVisualizer):
def score(self, X, y, **kwargs): ...