CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-scikit-learn

A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.

87

0.98x
Overview
Eval results
Files

metrics.mddocs/

Metrics and Visualization

This document covers all performance evaluation metrics, scoring functions, and visualization displays in scikit-learn.

Classification Metrics

Accuracy and Basic Metrics

accuracy_score { .api }

from sklearn.metrics import accuracy_score

accuracy_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    normalize: bool = True,
    sample_weight: ArrayLike | None = None
) -> float

Accuracy classification score.

balanced_accuracy_score { .api }

from sklearn.metrics import balanced_accuracy_score

balanced_accuracy_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    adjusted: bool = False
) -> float

Compute the balanced accuracy.

top_k_accuracy_score { .api }

from sklearn.metrics import top_k_accuracy_score

top_k_accuracy_score(
    y_true: ArrayLike,
    y_score: ArrayLike,
    k: int = 2,
    normalize: bool = True,
    sample_weight: ArrayLike | None = None,
    labels: ArrayLike | None = None
) -> float

Top-k Accuracy classification score.

Precision, Recall, and F-scores

precision_score { .api }

from sklearn.metrics import precision_score

precision_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    labels: ArrayLike | None = None,
    pos_label: str | int = 1,
    average: str | None = "binary",
    sample_weight: ArrayLike | None = None,
    zero_division: str | int = "warn"
) -> float | ArrayLike

Compute the precision.

recall_score { .api }

from sklearn.metrics import recall_score

recall_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    labels: ArrayLike | None = None,
    pos_label: str | int = 1,
    average: str | None = "binary",
    sample_weight: ArrayLike | None = None,
    zero_division: str | int = "warn"
) -> float | ArrayLike

Compute the recall.

f1_score { .api }

from sklearn.metrics import f1_score

f1_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    labels: ArrayLike | None = None,
    pos_label: str | int = 1,
    average: str | None = "binary",
    sample_weight: ArrayLike | None = None,
    zero_division: str | int = "warn"
) -> float | ArrayLike

Compute the F1 score, the harmonic mean of precision and recall.

fbeta_score { .api }

from sklearn.metrics import fbeta_score

fbeta_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    beta: float,
    labels: ArrayLike | None = None,
    pos_label: str | int = 1,
    average: str | None = "binary",
    sample_weight: ArrayLike | None = None,
    zero_division: str | int = "warn"
) -> float | ArrayLike

Compute the F-beta score.

precision_recall_fscore_support { .api }

from sklearn.metrics import precision_recall_fscore_support

precision_recall_fscore_support(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    beta: float = 1.0,
    labels: ArrayLike | None = None,
    pos_label: str | int = 1,
    average: str | None = None,
    warn_for: tuple = ("precision", "recall", "f-score"),
    sample_weight: ArrayLike | None = None,
    zero_division: str | int = "warn"
) -> tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike]

Compute precision, recall, F-measure and support for each class.

Confusion Matrix and Classification Report

confusion_matrix { .api }

from sklearn.metrics import confusion_matrix

confusion_matrix(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    labels: ArrayLike | None = None,
    sample_weight: ArrayLike | None = None,
    normalize: str | None = None
) -> ArrayLike

Compute confusion matrix to evaluate the accuracy of a classification.

classification_report { .api }

from sklearn.metrics import classification_report

classification_report(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    labels: ArrayLike | None = None,
    target_names: list[str] | None = None,
    sample_weight: ArrayLike | None = None,
    digits: int = 2,
    output_dict: bool = False,
    zero_division: str | int = "warn"
) -> str | dict

Build a text report showing the main classification metrics.

ROC and AUC Metrics

roc_auc_score { .api }

from sklearn.metrics import roc_auc_score

roc_auc_score(
    y_true: ArrayLike,
    y_score: ArrayLike,
    average: str | None = "macro",
    sample_weight: ArrayLike | None = None,
    max_fpr: float | None = None,
    multi_class: str = "raise",
    labels: ArrayLike | None = None
) -> float

Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC).

roc_curve { .api }

from sklearn.metrics import roc_curve

roc_curve(
    y_true: ArrayLike,
    y_score: ArrayLike,
    pos_label: int | str | None = None,
    sample_weight: ArrayLike | None = None,
    drop_intermediate: bool = True
) -> tuple[ArrayLike, ArrayLike, ArrayLike]

Compute Receiver operating characteristic (ROC).

auc { .api }

from sklearn.metrics import auc

auc(
    x: ArrayLike,
    y: ArrayLike
) -> float

Compute Area Under the Curve (AUC) using the trapezoidal rule.

Precision-Recall Metrics

average_precision_score { .api }

from sklearn.metrics import average_precision_score

average_precision_score(
    y_true: ArrayLike,
    y_score: ArrayLike,
    average: str | None = "macro",
    pos_label: int | str = 1,
    sample_weight: ArrayLike | None = None
) -> float

Compute average precision (AP) from prediction scores.

precision_recall_curve { .api }

from sklearn.metrics import precision_recall_curve

precision_recall_curve(
    y_true: ArrayLike,
    probas_pred: ArrayLike,
    pos_label: int | str | None = None,
    sample_weight: ArrayLike | None = None
) -> tuple[ArrayLike, ArrayLike, ArrayLike]

Compute precision-recall pairs for different probability thresholds.

Loss Functions

log_loss { .api }

from sklearn.metrics import log_loss

log_loss(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    eps: float | str = "auto",
    normalize: bool = True,
    sample_weight: ArrayLike | None = None,
    labels: ArrayLike | None = None
) -> float

Log loss, aka logistic loss or cross-entropy loss.

hinge_loss { .api }

from sklearn.metrics import hinge_loss

hinge_loss(
    y_true: ArrayLike,
    pred_decision: ArrayLike,
    labels: ArrayLike | None = None,
    sample_weight: ArrayLike | None = None
) -> float

Average hinge loss (non-regularized).

brier_score_loss { .api }

from sklearn.metrics import brier_score_loss

brier_score_loss(
    y_true: ArrayLike,
    y_prob: ArrayLike,
    sample_weight: ArrayLike | None = None,
    pos_label: int | str | None = None
) -> float

Compute the Brier score loss.

hamming_loss { .api }

from sklearn.metrics import hamming_loss

hamming_loss(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None
) -> float

Compute the average Hamming loss.

jaccard_score { .api }

from sklearn.metrics import jaccard_score

jaccard_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    labels: ArrayLike | None = None,
    pos_label: str | int = 1,
    average: str | None = "binary",
    sample_weight: ArrayLike | None = None,
    zero_division: str | int = "warn"
) -> float

Jaccard similarity coefficient score.

zero_one_loss { .api }

from sklearn.metrics import zero_one_loss

zero_one_loss(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    normalize: bool = True,
    sample_weight: ArrayLike | None = None
) -> float

Zero-one classification loss.

Statistical Measures

matthews_corrcoef { .api }

from sklearn.metrics import matthews_corrcoef

matthews_corrcoef(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None
) -> float

Compute the Matthews correlation coefficient (MCC).

cohen_kappa_score { .api }

from sklearn.metrics import cohen_kappa_score

cohen_kappa_score(
    y1: ArrayLike,
    y2: ArrayLike,
    labels: ArrayLike | None = None,
    weights: str | None = None,
    sample_weight: ArrayLike | None = None
) -> float

Compute Cohen's kappa: a statistic that measures inter-annotator agreement.

class_likelihood_ratios { .api }

from sklearn.metrics import class_likelihood_ratios

class_likelihood_ratios(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    labels: ArrayLike | None = None,
    sample_weight: ArrayLike | None = None
) -> tuple[ArrayLike, ArrayLike]

Compute binary classification positive and negative likelihood ratios.

Detection Error Tradeoff

det_curve { .api }

from sklearn.metrics import det_curve

det_curve(
    y_true: ArrayLike,
    y_score: ArrayLike,
    pos_label: int | str | None = None,
    sample_weight: ArrayLike | None = None
) -> tuple[ArrayLike, ArrayLike, ArrayLike]

Compute error rates for different probability thresholds.

Regression Metrics

Basic Regression Metrics

mean_squared_error { .api }

from sklearn.metrics import mean_squared_error

mean_squared_error(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike = "uniform_average",
    squared: bool = True
) -> float | ArrayLike

Mean squared error regression loss.

mean_absolute_error { .api }

from sklearn.metrics import mean_absolute_error

mean_absolute_error(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Mean absolute error regression loss.

mean_absolute_percentage_error { .api }

from sklearn.metrics import mean_absolute_percentage_error

mean_absolute_percentage_error(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Mean absolute percentage error (MAPE) regression loss.

median_absolute_error { .api }

from sklearn.metrics import median_absolute_error

median_absolute_error(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    multioutput: str | ArrayLike = "uniform_average",
    sample_weight: ArrayLike | None = None
) -> float | ArrayLike

Median absolute error regression loss.

max_error { .api }

from sklearn.metrics import max_error

max_error(
    y_true: ArrayLike,
    y_pred: ArrayLike
) -> float

Compute the maximum residual error.

mean_squared_log_error { .api }

from sklearn.metrics import mean_squared_log_error

mean_squared_log_error(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Mean squared logarithmic error regression loss.

mean_poisson_deviance { .api }

from sklearn.metrics import mean_poisson_deviance

mean_poisson_deviance(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None
) -> float

Mean Poisson deviance regression loss.

mean_gamma_deviance { .api }

from sklearn.metrics import mean_gamma_deviance

mean_gamma_deviance(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None
) -> float

Mean Gamma deviance regression loss.

mean_tweedie_deviance { .api }

from sklearn.metrics import mean_tweedie_deviance

mean_tweedie_deviance(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    power: float = 0
) -> float

Mean Tweedie deviance regression loss.

Coefficient of Determination

r2_score { .api }

from sklearn.metrics import r2_score

r2_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike | None = "uniform_average",
    force_finite: bool = True
) -> float | ArrayLike

R² (coefficient of determination) regression score function.

explained_variance_score { .api }

from sklearn.metrics import explained_variance_score

explained_variance_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike = "uniform_average",
    force_finite: bool = True
) -> float | ArrayLike

Explained variance regression score function.

D² Scores

d2_absolute_error_score { .api }

from sklearn.metrics import d2_absolute_error_score

d2_absolute_error_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Compute the d2 absolute error score.

d2_log_loss_score { .api }

from sklearn.metrics import d2_log_loss_score

d2_log_loss_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Compute the d2 log loss score.

d2_pinball_score { .api }

from sklearn.metrics import d2_pinball_score

d2_pinball_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    alpha: float = 0.5,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Compute the d2 pinball score.

d2_tweedie_score { .api }

from sklearn.metrics import d2_tweedie_score

d2_tweedie_score(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    power: float = 0,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Compute the d2 Tweedie score.

Loss Functions

mean_pinball_loss { .api }

from sklearn.metrics import mean_pinball_loss

mean_pinball_loss(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: ArrayLike | None = None,
    alpha: float = 0.5,
    multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLike

Pinball loss for quantile regression.

Clustering Metrics

Internal Validation

silhouette_score { .api }

from sklearn.metrics import silhouette_score

silhouette_score(
    X: ArrayLike,
    labels: ArrayLike,
    metric: str | Callable = "euclidean",
    sample_size: int | None = None,
    random_state: int | RandomState | None = None,
    **kwds
) -> float

Compute the mean Silhouette Coefficient of all samples.

silhouette_samples { .api }

from sklearn.metrics import silhouette_samples

silhouette_samples(
    X: ArrayLike,
    labels: ArrayLike,
    metric: str | Callable = "euclidean",
    **kwds
) -> ArrayLike

Compute the Silhouette Coefficient for each sample.

calinski_harabasz_score { .api }

from sklearn.metrics import calinski_harabasz_score

calinski_harabasz_score(
    X: ArrayLike,
    labels: ArrayLike
) -> float

Compute the Calinski and Harabasz score.

davies_bouldin_score { .api }

from sklearn.metrics import davies_bouldin_score

davies_bouldin_score(
    X: ArrayLike,
    labels: ArrayLike
) -> float

Compute the Davies-Bouldin score.

External Validation

adjusted_rand_score { .api }

from sklearn.metrics import adjusted_rand_score

adjusted_rand_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike
) -> float

Rand index adjusted for chance.

rand_score { .api }

from sklearn.metrics import rand_score

rand_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike
) -> float

Rand index.

adjusted_mutual_info_score { .api }

from sklearn.metrics import adjusted_mutual_info_score

adjusted_mutual_info_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike,
    average_method: str = "arithmetic"
) -> float

Adjusted Mutual Information between two clusterings.

normalized_mutual_info_score { .api }

from sklearn.metrics import normalized_mutual_info_score

normalized_mutual_info_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike,
    average_method: str = "arithmetic"
) -> float

Normalized Mutual Information between two clusterings.

mutual_info_score { .api }

from sklearn.metrics import mutual_info_score

mutual_info_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike,
    contingency: ArrayLike | None = None
) -> float

Mutual Information between two clusterings.

fowlkes_mallows_score { .api }

from sklearn.metrics import fowlkes_mallows_score

fowlkes_mallows_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike,
    sparse: bool = False
) -> float

Measure the similarity of two clusterings of a set of points.

homogeneity_score { .api }

from sklearn.metrics import homogeneity_score

homogeneity_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike
) -> float

Homogeneity metric of a cluster labeling given a ground truth.

completeness_score { .api }

from sklearn.metrics import completeness_score

completeness_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike
) -> float

Compute completeness metric of a cluster labeling.

v_measure_score { .api }

from sklearn.metrics import v_measure_score

v_measure_score(
    labels_true: ArrayLike,
    labels_pred: ArrayLike,
    beta: float = 1.0
) -> float

V-measure cluster labeling given a ground truth.

homogeneity_completeness_v_measure { .api }

from sklearn.metrics import homogeneity_completeness_v_measure

homogeneity_completeness_v_measure(
    labels_true: ArrayLike,
    labels_pred: ArrayLike,
    beta: float = 1.0
) -> tuple[float, float, float]

Compute the homogeneity and completeness and V-Measure scores.

Biclustering Metrics

consensus_score { .api }

from sklearn.metrics import consensus_score

consensus_score(
    a: tuple[ArrayLike, ArrayLike],
    b: tuple[ArrayLike, ArrayLike],
    similarity: str | Callable = "jaccard"
) -> float

The similarity of two sets of biclusters.

Multilabel Metrics

coverage_error { .api }

from sklearn.metrics import coverage_error

coverage_error(
    y_true: ArrayLike,
    y_score: ArrayLike,
    sample_weight: ArrayLike | None = None
) -> float

Coverage error measure.

label_ranking_average_precision_score { .api }

from sklearn.metrics import label_ranking_average_precision_score

label_ranking_average_precision_score(
    y_true: ArrayLike,
    y_score: ArrayLike,
    sample_weight: ArrayLike | None = None
) -> float

Compute ranking-based average precision.

label_ranking_loss { .api }

from sklearn.metrics import label_ranking_loss

label_ranking_loss(
    y_true: ArrayLike,
    y_score: ArrayLike,
    sample_weight: ArrayLike | None = None
) -> float

Compute Ranking loss measure.

Distance Metrics

pairwise_distances { .api }

from sklearn.metrics import pairwise_distances

pairwise_distances(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    metric: str | Callable = "euclidean",
    n_jobs: int | None = None,
    force_all_finite: bool | str = True,
    **kwds
) -> ArrayLike

Compute the distance matrix from a vector array X and optional Y.

euclidean_distances { .api }

from sklearn.metrics import euclidean_distances

euclidean_distances(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    Y_norm_squared: ArrayLike | None = None,
    squared: bool = False,
    X_norm_squared: ArrayLike | None = None
) -> ArrayLike

Compute the distance matrix between each pair from a vector array X.

manhattan_distances { .api }

from sklearn.metrics import manhattan_distances

manhattan_distances(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    sum_over_features: bool = True
) -> ArrayLike

Compute the L1 distances between the vectors in X and Y.

cosine_distances { .api }

from sklearn.metrics import cosine_distances

cosine_distances(
    X: ArrayLike,
    Y: ArrayLike | None = None
) -> ArrayLike

Compute cosine distance between samples in X and Y.

haversine_distances { .api }

from sklearn.metrics import haversine_distances

haversine_distances(
    X: ArrayLike,
    Y: ArrayLike | None = None
) -> ArrayLike

Compute the Haversine distance between samples in X and Y.

Similarity Metrics

cosine_similarity { .api }

from sklearn.metrics import cosine_similarity

cosine_similarity(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    dense_output: bool = True
) -> ArrayLike

Compute cosine similarity between samples in X and Y.

linear_kernel { .api }

from sklearn.metrics import linear_kernel

linear_kernel(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    dense_output: bool = True
) -> ArrayLike

Compute the linear kernel between X and Y.

polynomial_kernel { .api }

from sklearn.metrics import polynomial_kernel

polynomial_kernel(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    degree: int = 3,
    gamma: float | None = None,
    coef0: float = 1,
    dense_output: bool = True
) -> ArrayLike

Compute the polynomial kernel between X and Y.

rbf_kernel { .api }

from sklearn.metrics import rbf_kernel

rbf_kernel(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    gamma: float | None = None,
    dense_output: bool = True
) -> ArrayLike

Compute the rbf (gaussian) kernel between X and Y.

sigmoid_kernel { .api }

from sklearn.metrics import sigmoid_kernel

sigmoid_kernel(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    gamma: float | None = None,
    coef0: float = 1,
    dense_output: bool = True
) -> ArrayLike

Compute the sigmoid kernel between X and Y.

laplacian_kernel { .api }

from sklearn.metrics import laplacian_kernel

laplacian_kernel(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    gamma: float | None = None,
    dense_output: bool = True
) -> ArrayLike

Compute the laplacian kernel between X and Y.

chi2_kernel { .api }

from sklearn.metrics import chi2_kernel

chi2_kernel(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    gamma: float = 1.0,
    dense_output: bool = True
) -> ArrayLike

Compute the exponential chi-squared kernel between X and Y.

additive_chi2_kernel { .api }

from sklearn.metrics import additive_chi2_kernel

additive_chi2_kernel(
    X: ArrayLike,
    Y: ArrayLike | None = None,
    dense_output: bool = True
) -> ArrayLike

Compute the additive chi-squared kernel between observations in X and Y.

Distance Metric Class

DistanceMetric { .api }

from sklearn.metrics import DistanceMetric

DistanceMetric(
    metric: str,
    **kwargs
)

DistanceMetric class.

Ranking Metrics

dcg_score { .api }

from sklearn.metrics import dcg_score

dcg_score(
    y_true: ArrayLike,
    y_score: ArrayLike,
    k: int | None = None,
    log_base: float = 2,
    sample_weight: ArrayLike | None = None,
    ignore_ties: bool = False
) -> float

Compute Discounted Cumulative Gain.

ndcg_score { .api }

from sklearn.metrics import ndcg_score

ndcg_score(
    y_true: ArrayLike,
    y_score: ArrayLike,
    k: int | None = None,
    sample_weight: ArrayLike | None = None,
    ignore_ties: bool = False
) -> float

Compute Normalized Discounted Cumulative Gain.

Scoring and Model Evaluation

Scoring Functions

make_scorer { .api }

from sklearn.metrics import make_scorer

make_scorer(
    score_func: Callable,
    greater_is_better: bool = True,
    needs_proba: bool = False,
    needs_threshold: bool = False,
    **kwargs
) -> Callable

Make a scorer from a performance metric or loss function.

get_scorer { .api }

from sklearn.metrics import get_scorer

get_scorer(
    scoring: str
) -> Callable

Get a scorer from string.

get_scorer_names { .api }

from sklearn.metrics import get_scorer_names

get_scorer_names() -> list[str]

Get the names of all available scorers.

check_scoring { .api }

from sklearn.metrics import check_scoring

check_scoring(
    estimator: BaseEstimator,
    scoring: str | Callable | None = None,
    allow_none: bool = False
) -> Callable | None

Determine scorer from user options.

Visualization and Display Classes

Classification Displays

ConfusionMatrixDisplay { .api }

from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay(
    confusion_matrix: ArrayLike,
    display_labels: ArrayLike | None = None
)

Confusion Matrix visualization.

RocCurveDisplay { .api }

from sklearn.metrics import RocCurveDisplay

RocCurveDisplay(
    fpr: ArrayLike,
    tpr: ArrayLike,
    roc_auc: float | None = None,
    estimator_name: str | None = None,
    pos_label: str | int | None = None
)

ROC Curve visualization.

PrecisionRecallDisplay { .api }

from sklearn.metrics import PrecisionRecallDisplay

PrecisionRecallDisplay(
    precision: ArrayLike,
    recall: ArrayLike,
    average_precision: float | None = None,
    estimator_name: str | None = None,
    pos_label: str | int | None = None
)

Precision Recall visualization.

DetCurveDisplay { .api }

from sklearn.metrics import DetCurveDisplay

DetCurveDisplay(
    fpr: ArrayLike,
    fnr: ArrayLike,
    estimator_name: str | None = None,
    pos_label: str | int | None = None
)

DET curve visualization.

Regression Displays

PredictionErrorDisplay { .api }

from sklearn.metrics import PredictionErrorDisplay

PredictionErrorDisplay(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    kind: str = "actual_vs_predicted",
    subsample: float | int | None = None,
    random_state: int | RandomState | None = None
)

Prediction error visualization.

Examples

Classification Metrics Example

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_auc_score
)
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Generate sample data
X, y = make_classification(n_samples=1000, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_proba)

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-score: {f1:.3f}")
print(f"ROC-AUC: {roc_auc:.3f}")

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print(f"\nConfusion Matrix:\n{cm}")

# Classification report
report = classification_report(y_test, y_pred)
print(f"\nClassification Report:\n{report}")

Regression Metrics Example

from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score,
    explained_variance_score, max_error
)
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor

# Generate sample data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
reg = RandomForestRegressor(n_estimators=100, random_state=42)
reg.fit(X_train, y_train)

# Make predictions
y_pred = reg.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ev = explained_variance_score(y_test, y_pred)
max_err = max_error(y_test, y_pred)

print(f"MSE: {mse:.3f}")
print(f"RMSE: {rmse:.3f}")
print(f"MAE: {mae:.3f}")
print(f"R² Score: {r2:.3f}")
print(f"Explained Variance: {ev:.3f}")
print(f"Max Error: {max_err:.3f}")

Clustering Metrics Example

from sklearn.metrics import (
    silhouette_score, calinski_harabasz_score, davies_bouldin_score,
    adjusted_rand_score, adjusted_mutual_info_score
)
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

# Generate sample data
X, y_true = make_blobs(n_samples=300, centers=4, n_features=2, 
                       random_state=42, cluster_std=0.8)

# Perform clustering
kmeans = KMeans(n_clusters=4, random_state=42)
y_pred = kmeans.fit_predict(X)

# Internal validation metrics
silhouette = silhouette_score(X, y_pred)
calinski_harabasz = calinski_harabasz_score(X, y_pred)
davies_bouldin = davies_bouldin_score(X, y_pred)

print(f"Silhouette Score: {silhouette:.3f}")
print(f"Calinski-Harabasz Score: {calinski_harabasz:.3f}")
print(f"Davies-Bouldin Score: {davies_bouldin:.3f}")

# External validation metrics (when true labels are available)
ari = adjusted_rand_score(y_true, y_pred)
ami = adjusted_mutual_info_score(y_true, y_pred)

print(f"Adjusted Rand Index: {ari:.3f}")
print(f"Adjusted Mutual Information: {ami:.3f}")

ROC Curve and Visualization Example

from sklearn.metrics import roc_curve, RocCurveDisplay
import matplotlib.pyplot as plt

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)

# Create ROC curve display
roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc)
roc_display.plot()
plt.title('ROC Curve')
plt.show()

# Or create directly from estimator
RocCurveDisplay.from_estimator(clf, X_test, y_test)
plt.show()

Custom Scorer Example

from sklearn.metrics import make_scorer
import numpy as np

# Define custom scoring function
def custom_accuracy(y_true, y_pred):
    """Custom accuracy that weights errors differently."""
    return np.mean(y_true == y_pred) * 1.1  # Boost accuracy by 10%

# Create scorer
custom_scorer = make_scorer(custom_accuracy, greater_is_better=True)

# Use in cross-validation or grid search
from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf, X, y, cv=5, scoring=custom_scorer)
print(f"Custom scores: {scores}")

Install with Tessl CLI

npx tessl i tessl/pypi-scikit-learn

docs

datasets.md

feature-extraction.md

index.md

metrics.md

model-selection.md

neighbors.md

pipelines.md

preprocessing.md

supervised-learning.md

unsupervised-learning.md

utilities.md

tile.json