tessl/pypi-mlxtend

Machine Learning Library Extensions providing essential tools for day-to-day data science tasks

—

Pending

Overview

Eval results

Files

Visualization Tools

Name: tessl/pypi-mlxtend
Author: tessl

Specialized plotting functions for machine learning model analysis including decision regions, learning curves, and confusion matrices. All plotting functions integrate with matplotlib and can be customized using standard matplotlib parameters.

Capabilities

Decision Region Visualization

Visualize decision boundaries and regions for 2D datasets and classifiers.

def plot_decision_regions(X, y, clf, feature_idx=None, filler_feature_values=None,
                         filler_feature_ranges=None, ax=None, X_highlight=None,
                         res=0.02, legend=1, hide_spines=True, markers='s^oxv<>',
                         colors='red,blue,limegreen,gray,cyan', scatter_kwargs=None,
                         contourf_kwargs=None, scatter_highlight_kwargs=None):
    """
    Plot decision regions for 2D feature spaces.
    
    Parameters:
    - X: array-like, feature matrix (shape: [n_samples, n_features])
    - y: array-like, class labels (shape: [n_samples])
    - clf: sklearn-compatible classifier with predict method
    - feature_idx: list, indices of features to plot (default: [0, 1])
    - filler_feature_values: dict, values for non-plotted features
    - filler_feature_ranges: dict, ranges for non-plotted features
    - ax: matplotlib axis, axis to plot on
    - X_highlight: array-like, samples to highlight
    - res: float, grid resolution
    - legend: int, legend configuration (0=no legend, 1=auto, 2=upper left)
    - hide_spines: bool, hide plot spines
    - markers: str, marker symbols for classes
    - colors: str, color palette for classes
    - scatter_kwargs: dict, additional scatter plot parameters
    - contourf_kwargs: dict, additional contour plot parameters
    - scatter_highlight_kwargs: dict, highlight scatter parameters
    
    Returns:
    - ax: matplotlib axis object
    """

Learning Curve Visualization

Plot learning curves to analyze model performance vs training set size or training progress.

def plot_learning_curves(X_train, y_train, X_test, y_test, clf, train_marker='o',
                        test_marker='^', marker_size=7, alpha=0.75, 
                        scoring='misclassification error', suppress_plot=False,
                        print_model=True, style='fivethirtyeight', legend_loc='best'):
    """
    Plot learning curves showing training and test performance.
    
    Parameters:
    - X_train: array-like, training features
    - y_train: array-like, training labels
    - X_test: array-like, test features  
    - y_test: array-like, test labels
    - clf: sklearn-compatible classifier
    - train_marker: str, marker style for training scores
    - test_marker: str, marker style for test scores
    - marker_size: int, size of markers
    - alpha: float, marker transparency
    - scoring: str, scoring metric ('misclassification error', 'accuracy', 'auc')
    - suppress_plot: bool, suppress plot display
    - print_model: bool, print model details
    - style: str, matplotlib style
    - legend_loc: str, legend location
    
    Returns:
    - train_scores: list, training scores
    - test_scores: list, test scores
    """

Confusion Matrix Visualization

Create visual representations of confusion matrices with customization options.

def plot_confusion_matrix(conf_mat, hide_spines=False, hide_ticks=False,
                         figsize=None, cmap=None, colorbar=False, show_absolute=True,
                         show_normed=False, normed_type='all', class_names=None):
    """
    Plot confusion matrix with customization options.
    
    Parameters:
    - conf_mat: array-like, confusion matrix
    - hide_spines: bool, hide plot spines
    - hide_ticks: bool, hide axis ticks
    - figsize: tuple, figure size (width, height)
    - cmap: str, colormap name
    - colorbar: bool, show colorbar
    - show_absolute: bool, show absolute counts
    - show_normed: bool, show normalized values
    - normed_type: str, normalization type ('all', 'pred', 'true')
    - class_names: list, class label names
    
    Returns:
    - fig: matplotlib figure object
    - ax: matplotlib axis object
    """

Feature Selection Visualization

Visualize results from sequential feature selection algorithms.

def plot_sequential_feature_selection(metric_dict, kind='std_dev', color='blue',
                                     barchart=False, figsize=None):
    """
    Plot sequential feature selection results.
    
    Parameters:
    - metric_dict: dict, metrics from SequentialFeatureSelector
    - kind: str, plot type ('std_dev', 'std_err', 'ci')
    - color: str, plot color
    - barchart: bool, use bar chart instead of line plot
    - figsize: tuple, figure size
    
    Returns:
    - fig: matplotlib figure object
    - ax: matplotlib axis object
    """

Linear Regression Visualization

Visualize linear regression fits and residuals.

def plot_linear_regression(X, y, model=None, corr_func='pearsonr', scattercolor='blue',
                          fit_style='k--', legend=True, xlim='auto'):
    """
    Plot linear regression fit with correlation coefficient.
    
    Parameters:
    - X: array-like, feature values (1D)
    - y: array-like, target values  
    - model: sklearn-compatible regressor, fitted model
    - corr_func: str, correlation function ('pearsonr', 'spearmanr')
    - scattercolor: str, scatter plot color
    - fit_style: str, regression line style
    - legend: bool, show legend with correlation
    - xlim: str or tuple, x-axis limits
    
    Returns:
    - correlation: float, correlation coefficient
    """

Specialized Plot Types

Various specialized plotting functions for data analysis and visualization.

def category_scatter(x, y, label_col, selection=None, alpha=1.0, markers='o',
                    colors=None, figsize=(7, 5)):
    """
    Create scatter plot with categorical coloring.
    
    Parameters:
    - x: str or array-like, x-axis data
    - y: str or array-like, y-axis data
    - label_col: str or array-like, categorical labels
    - selection: list, subset of categories to plot
    - alpha: float, point transparency
    - markers: str, marker symbols
    - colors: list, color palette
    - figsize: tuple, figure size
    
    Returns:
    - fig: matplotlib figure object
    """

def heatmap(ary, xlabels=None, ylabels=None, fmt='%.1f', cmap='Blues',
           cbar=True, cbar_kws=None, figsize=None):
    """
    Create heatmap visualization.
    
    Parameters:
    - ary: array-like, 2D data matrix
    - xlabels: list, x-axis labels
    - ylabels: list, y-axis labels
    - fmt: str, number format string
    - cmap: str, colormap name
    - cbar: bool, show colorbar
    - cbar_kws: dict, colorbar keyword arguments
    - figsize: tuple, figure size
    
    Returns:
    - fig: matplotlib figure object
    - ax: matplotlib axis object
    """

def stacked_barplot(df, bar_names=None, figsize=(8, 5), n_legend_cols=1,
                   legend_loc='best'):
    """
    Create stacked bar plot from DataFrame.
    
    Parameters:
    - df: DataFrame, data with categories as columns
    - bar_names: list, names for bars (uses index if None)
    - figsize: tuple, figure size
    - n_legend_cols: int, number of legend columns
    - legend_loc: str, legend location
    
    Returns:
    - fig: matplotlib figure object
    - ax: matplotlib axis object
    """

def enrichment_plot(df, colors='bgrkcy', markers=' ', linestyles='-',
                   alpha=1.0, lw=2, where='post', grid=True, count_label='Count',
                   xlim=None, ylim=None, invert_axes=False, legend_loc='best'):
    """
    Create enrichment plot for feature analysis.
    
    Parameters:
    - df: DataFrame, enrichment data
    - colors: str, color sequence
    - markers: str, marker sequence  
    - linestyles: str, line style sequence
    - alpha: float, line transparency
    - lw: float, line width
    - where: str, step plot style
    - grid: bool, show grid
    - count_label: str, y-axis label
    - xlim: tuple, x-axis limits
    - ylim: tuple, y-axis limits
    - invert_axes: bool, swap x and y axes
    - legend_loc: str, legend location
    
    Returns:
    - ax: matplotlib axis object
    """

def checkerboard_plot(ary, fmt='%.1f', figsize=None, cbar=False, cmap=None,
                     labels_x=None, labels_y=None, fontsize_data=12):
    """
    Create checkerboard-style matrix plot.
    
    Parameters:
    - ary: array-like, 2D data matrix
    - fmt: str, number format string
    - figsize: tuple, figure size
    - cbar: bool, show colorbar
    - cmap: str, colormap name
    - labels_x: list, x-axis labels
    - labels_y: list, y-axis labels
    - fontsize_data: int, font size for data values
    
    Returns:
    - fig: matplotlib figure object
    - ax: matplotlib axis object
    """

def ecdf(x, y_label='ECDF', x_label=None, ax=None, percentile=None, **kwargs):
    """
    Plot empirical cumulative distribution function.
    
    Parameters:
    - x: array-like, data values
    - y_label: str, y-axis label
    - x_label: str, x-axis label
    - ax: matplotlib axis, axis to plot on
    - percentile: float, percentile line to highlight
    - kwargs: additional plot parameters
    
    Returns:
    - ax: matplotlib axis object
    - (percentile_val, percentile_prob): tuple if percentile specified
    """

def scatterplotmatrix(X, names=None, figsize=(8, 8), alpha=1.0, **kwargs):
    """
    Create scatter plot matrix for multiple variables.
    
    Parameters:
    - X: array-like, feature matrix
    - names: list, variable names
    - figsize: tuple, figure size
    - alpha: float, point transparency
    - kwargs: additional scatter plot parameters
    
    Returns:
    - fig: matplotlib figure object
    - axes: array of axis objects
    """

def plot_pca_correlation_graph(X, variables_names, dimensions=(1, 2),
                              figsize=(10, 8), X_pca=None, explained_variance=None):
    """
    Plot PCA correlation graph showing variable relationships.
    
    Parameters:
    - X: array-like, original feature matrix
    - variables_names: list, variable names
    - dimensions: tuple, PCA dimensions to plot
    - figsize: tuple, figure size
    - X_pca: array-like, pre-computed PCA transform
    - explained_variance: array-like, explained variance ratios
    
    Returns:
    - fig: matplotlib figure object
    - ax: matplotlib axis object
    """

def scatter_hist(x, y, hist_bins=20, hist_range=None, alpha=0.5, 
                scatter_kwargs=None, hist_kwargs=None, figsize=(5, 5)):
    """
    Create scatter plot with marginal histograms.
    
    Parameters:
    - x: array-like, x-axis data
    - y: array-like, y-axis data
    - hist_bins: int, number of histogram bins
    - hist_range: tuple, histogram range
    - alpha: float, histogram transparency
    - scatter_kwargs: dict, scatter plot parameters
    - hist_kwargs: dict, histogram parameters
    - figsize: tuple, figure size
    
    Returns:
    - fig: matplotlib figure object
    - axes: dict of axis objects {'scatter', 'hist_x', 'hist_y'}
    """

def remove_borders(axes=None):
    """
    Remove borders and spines from matplotlib plots.
    
    Parameters:
    - axes: matplotlib axis or list of axes, axes to modify
    """

Usage Examples

Decision Regions Example

from mlxtend.plotting import plot_decision_regions
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt

# Create 2D dataset
X, y = make_classification(n_samples=300, n_features=2, n_redundant=0, 
                         n_informative=2, random_state=42, n_clusters_per_class=1)

# Train classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X, y)

# Plot decision regions
plot_decision_regions(X, y, clf=clf, legend=2)
plt.title('Random Forest Decision Regions')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

Learning Curves Example

from mlxtend.plotting import plot_learning_curves
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Create dataset
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create classifier
clf = SVC(random_state=42)

# Plot learning curves
train_scores, test_scores = plot_learning_curves(
    X_train, y_train, X_test, y_test, clf, 
    scoring='accuracy', style='ggplot'
)
plt.title('SVM Learning Curves')
plt.show()

Sequential Feature Selection Visualization

from mlxtend.feature_selection import SequentialFeatureSelector
from mlxtend.plotting import plot_sequential_feature_selection
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt

# Create dataset
X, y = make_classification(n_samples=500, n_features=15, random_state=42)

# Perform sequential feature selection
clf = RandomForestClassifier(random_state=42)
sfs = SequentialFeatureSelector(clf, k_features=8, forward=True, 
                               scoring='accuracy', cv=5)
sfs.fit(X, y)

# Plot results
plot_sequential_feature_selection(sfs.get_metric_dict(), kind='std_dev')
plt.title('Sequential Feature Selection Results')
plt.show()

Confusion Matrix Example

from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Create dataset and train classifier
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Create and plot confusion matrix
cm = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(cm, class_names=['Class 0', 'Class 1', 'Class 2'], 
                     show_normed=True, colorbar=True)
plt.title('Confusion Matrix')
plt.show()

Install with Tessl CLI