A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.
87
This document covers all performance evaluation metrics, scoring functions, and visualization displays in scikit-learn.
from sklearn.metrics import accuracy_score
accuracy_score(
y_true: ArrayLike,
y_pred: ArrayLike,
normalize: bool = True,
sample_weight: ArrayLike | None = None
) -> floatAccuracy classification score.
from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
adjusted: bool = False
) -> floatCompute the balanced accuracy.
from sklearn.metrics import top_k_accuracy_score
top_k_accuracy_score(
y_true: ArrayLike,
y_score: ArrayLike,
k: int = 2,
normalize: bool = True,
sample_weight: ArrayLike | None = None,
labels: ArrayLike | None = None
) -> floatTop-k Accuracy classification score.
from sklearn.metrics import precision_score
precision_score(
y_true: ArrayLike,
y_pred: ArrayLike,
labels: ArrayLike | None = None,
pos_label: str | int = 1,
average: str | None = "binary",
sample_weight: ArrayLike | None = None,
zero_division: str | int = "warn"
) -> float | ArrayLikeCompute the precision.
from sklearn.metrics import recall_score
recall_score(
y_true: ArrayLike,
y_pred: ArrayLike,
labels: ArrayLike | None = None,
pos_label: str | int = 1,
average: str | None = "binary",
sample_weight: ArrayLike | None = None,
zero_division: str | int = "warn"
) -> float | ArrayLikeCompute the recall.
from sklearn.metrics import f1_score
f1_score(
y_true: ArrayLike,
y_pred: ArrayLike,
labels: ArrayLike | None = None,
pos_label: str | int = 1,
average: str | None = "binary",
sample_weight: ArrayLike | None = None,
zero_division: str | int = "warn"
) -> float | ArrayLikeCompute the F1 score, the harmonic mean of precision and recall.
from sklearn.metrics import fbeta_score
fbeta_score(
y_true: ArrayLike,
y_pred: ArrayLike,
beta: float,
labels: ArrayLike | None = None,
pos_label: str | int = 1,
average: str | None = "binary",
sample_weight: ArrayLike | None = None,
zero_division: str | int = "warn"
) -> float | ArrayLikeCompute the F-beta score.
from sklearn.metrics import precision_recall_fscore_support
precision_recall_fscore_support(
y_true: ArrayLike,
y_pred: ArrayLike,
beta: float = 1.0,
labels: ArrayLike | None = None,
pos_label: str | int = 1,
average: str | None = None,
warn_for: tuple = ("precision", "recall", "f-score"),
sample_weight: ArrayLike | None = None,
zero_division: str | int = "warn"
) -> tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike]Compute precision, recall, F-measure and support for each class.
from sklearn.metrics import confusion_matrix
confusion_matrix(
y_true: ArrayLike,
y_pred: ArrayLike,
labels: ArrayLike | None = None,
sample_weight: ArrayLike | None = None,
normalize: str | None = None
) -> ArrayLikeCompute confusion matrix to evaluate the accuracy of a classification.
from sklearn.metrics import classification_report
classification_report(
y_true: ArrayLike,
y_pred: ArrayLike,
labels: ArrayLike | None = None,
target_names: list[str] | None = None,
sample_weight: ArrayLike | None = None,
digits: int = 2,
output_dict: bool = False,
zero_division: str | int = "warn"
) -> str | dictBuild a text report showing the main classification metrics.
from sklearn.metrics import roc_auc_score
roc_auc_score(
y_true: ArrayLike,
y_score: ArrayLike,
average: str | None = "macro",
sample_weight: ArrayLike | None = None,
max_fpr: float | None = None,
multi_class: str = "raise",
labels: ArrayLike | None = None
) -> floatCompute Area Under the Receiver Operating Characteristic Curve (ROC AUC).
from sklearn.metrics import roc_curve
roc_curve(
y_true: ArrayLike,
y_score: ArrayLike,
pos_label: int | str | None = None,
sample_weight: ArrayLike | None = None,
drop_intermediate: bool = True
) -> tuple[ArrayLike, ArrayLike, ArrayLike]Compute Receiver operating characteristic (ROC).
from sklearn.metrics import auc
auc(
x: ArrayLike,
y: ArrayLike
) -> floatCompute Area Under the Curve (AUC) using the trapezoidal rule.
from sklearn.metrics import average_precision_score
average_precision_score(
y_true: ArrayLike,
y_score: ArrayLike,
average: str | None = "macro",
pos_label: int | str = 1,
sample_weight: ArrayLike | None = None
) -> floatCompute average precision (AP) from prediction scores.
from sklearn.metrics import precision_recall_curve
precision_recall_curve(
y_true: ArrayLike,
probas_pred: ArrayLike,
pos_label: int | str | None = None,
sample_weight: ArrayLike | None = None
) -> tuple[ArrayLike, ArrayLike, ArrayLike]Compute precision-recall pairs for different probability thresholds.
from sklearn.metrics import log_loss
log_loss(
y_true: ArrayLike,
y_pred: ArrayLike,
eps: float | str = "auto",
normalize: bool = True,
sample_weight: ArrayLike | None = None,
labels: ArrayLike | None = None
) -> floatLog loss, aka logistic loss or cross-entropy loss.
from sklearn.metrics import hinge_loss
hinge_loss(
y_true: ArrayLike,
pred_decision: ArrayLike,
labels: ArrayLike | None = None,
sample_weight: ArrayLike | None = None
) -> floatAverage hinge loss (non-regularized).
from sklearn.metrics import brier_score_loss
brier_score_loss(
y_true: ArrayLike,
y_prob: ArrayLike,
sample_weight: ArrayLike | None = None,
pos_label: int | str | None = None
) -> floatCompute the Brier score loss.
from sklearn.metrics import hamming_loss
hamming_loss(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None
) -> floatCompute the average Hamming loss.
from sklearn.metrics import jaccard_score
jaccard_score(
y_true: ArrayLike,
y_pred: ArrayLike,
labels: ArrayLike | None = None,
pos_label: str | int = 1,
average: str | None = "binary",
sample_weight: ArrayLike | None = None,
zero_division: str | int = "warn"
) -> floatJaccard similarity coefficient score.
from sklearn.metrics import zero_one_loss
zero_one_loss(
y_true: ArrayLike,
y_pred: ArrayLike,
normalize: bool = True,
sample_weight: ArrayLike | None = None
) -> floatZero-one classification loss.
from sklearn.metrics import matthews_corrcoef
matthews_corrcoef(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None
) -> floatCompute the Matthews correlation coefficient (MCC).
from sklearn.metrics import cohen_kappa_score
cohen_kappa_score(
y1: ArrayLike,
y2: ArrayLike,
labels: ArrayLike | None = None,
weights: str | None = None,
sample_weight: ArrayLike | None = None
) -> floatCompute Cohen's kappa: a statistic that measures inter-annotator agreement.
from sklearn.metrics import class_likelihood_ratios
class_likelihood_ratios(
y_true: ArrayLike,
y_pred: ArrayLike,
labels: ArrayLike | None = None,
sample_weight: ArrayLike | None = None
) -> tuple[ArrayLike, ArrayLike]Compute binary classification positive and negative likelihood ratios.
from sklearn.metrics import det_curve
det_curve(
y_true: ArrayLike,
y_score: ArrayLike,
pos_label: int | str | None = None,
sample_weight: ArrayLike | None = None
) -> tuple[ArrayLike, ArrayLike, ArrayLike]Compute error rates for different probability thresholds.
from sklearn.metrics import mean_squared_error
mean_squared_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike = "uniform_average",
squared: bool = True
) -> float | ArrayLikeMean squared error regression loss.
from sklearn.metrics import mean_absolute_error
mean_absolute_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikeMean absolute error regression loss.
from sklearn.metrics import mean_absolute_percentage_error
mean_absolute_percentage_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikeMean absolute percentage error (MAPE) regression loss.
from sklearn.metrics import median_absolute_error
median_absolute_error(
y_true: ArrayLike,
y_pred: ArrayLike,
multioutput: str | ArrayLike = "uniform_average",
sample_weight: ArrayLike | None = None
) -> float | ArrayLikeMedian absolute error regression loss.
from sklearn.metrics import max_error
max_error(
y_true: ArrayLike,
y_pred: ArrayLike
) -> floatCompute the maximum residual error.
from sklearn.metrics import mean_squared_log_error
mean_squared_log_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikeMean squared logarithmic error regression loss.
from sklearn.metrics import mean_poisson_deviance
mean_poisson_deviance(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None
) -> floatMean Poisson deviance regression loss.
from sklearn.metrics import mean_gamma_deviance
mean_gamma_deviance(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None
) -> floatMean Gamma deviance regression loss.
from sklearn.metrics import mean_tweedie_deviance
mean_tweedie_deviance(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
power: float = 0
) -> floatMean Tweedie deviance regression loss.
from sklearn.metrics import r2_score
r2_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike | None = "uniform_average",
force_finite: bool = True
) -> float | ArrayLikeR² (coefficient of determination) regression score function.
from sklearn.metrics import explained_variance_score
explained_variance_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike = "uniform_average",
force_finite: bool = True
) -> float | ArrayLikeExplained variance regression score function.
from sklearn.metrics import d2_absolute_error_score
d2_absolute_error_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikeCompute the d2 absolute error score.
from sklearn.metrics import d2_log_loss_score
d2_log_loss_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikeCompute the d2 log loss score.
from sklearn.metrics import d2_pinball_score
d2_pinball_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
alpha: float = 0.5,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikeCompute the d2 pinball score.
from sklearn.metrics import d2_tweedie_score
d2_tweedie_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
power: float = 0,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikeCompute the d2 Tweedie score.
from sklearn.metrics import mean_pinball_loss
mean_pinball_loss(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: ArrayLike | None = None,
alpha: float = 0.5,
multioutput: str | ArrayLike = "uniform_average"
) -> float | ArrayLikePinball loss for quantile regression.
from sklearn.metrics import silhouette_score
silhouette_score(
X: ArrayLike,
labels: ArrayLike,
metric: str | Callable = "euclidean",
sample_size: int | None = None,
random_state: int | RandomState | None = None,
**kwds
) -> floatCompute the mean Silhouette Coefficient of all samples.
from sklearn.metrics import silhouette_samples
silhouette_samples(
X: ArrayLike,
labels: ArrayLike,
metric: str | Callable = "euclidean",
**kwds
) -> ArrayLikeCompute the Silhouette Coefficient for each sample.
from sklearn.metrics import calinski_harabasz_score
calinski_harabasz_score(
X: ArrayLike,
labels: ArrayLike
) -> floatCompute the Calinski and Harabasz score.
from sklearn.metrics import davies_bouldin_score
davies_bouldin_score(
X: ArrayLike,
labels: ArrayLike
) -> floatCompute the Davies-Bouldin score.
from sklearn.metrics import adjusted_rand_score
adjusted_rand_score(
labels_true: ArrayLike,
labels_pred: ArrayLike
) -> floatRand index adjusted for chance.
from sklearn.metrics import rand_score
rand_score(
labels_true: ArrayLike,
labels_pred: ArrayLike
) -> floatRand index.
from sklearn.metrics import adjusted_mutual_info_score
adjusted_mutual_info_score(
labels_true: ArrayLike,
labels_pred: ArrayLike,
average_method: str = "arithmetic"
) -> floatAdjusted Mutual Information between two clusterings.
from sklearn.metrics import normalized_mutual_info_score
normalized_mutual_info_score(
labels_true: ArrayLike,
labels_pred: ArrayLike,
average_method: str = "arithmetic"
) -> floatNormalized Mutual Information between two clusterings.
from sklearn.metrics import mutual_info_score
mutual_info_score(
labels_true: ArrayLike,
labels_pred: ArrayLike,
contingency: ArrayLike | None = None
) -> floatMutual Information between two clusterings.
from sklearn.metrics import fowlkes_mallows_score
fowlkes_mallows_score(
labels_true: ArrayLike,
labels_pred: ArrayLike,
sparse: bool = False
) -> floatMeasure the similarity of two clusterings of a set of points.
from sklearn.metrics import homogeneity_score
homogeneity_score(
labels_true: ArrayLike,
labels_pred: ArrayLike
) -> floatHomogeneity metric of a cluster labeling given a ground truth.
from sklearn.metrics import completeness_score
completeness_score(
labels_true: ArrayLike,
labels_pred: ArrayLike
) -> floatCompute completeness metric of a cluster labeling.
from sklearn.metrics import v_measure_score
v_measure_score(
labels_true: ArrayLike,
labels_pred: ArrayLike,
beta: float = 1.0
) -> floatV-measure cluster labeling given a ground truth.
from sklearn.metrics import homogeneity_completeness_v_measure
homogeneity_completeness_v_measure(
labels_true: ArrayLike,
labels_pred: ArrayLike,
beta: float = 1.0
) -> tuple[float, float, float]Compute the homogeneity and completeness and V-Measure scores.
from sklearn.metrics import consensus_score
consensus_score(
a: tuple[ArrayLike, ArrayLike],
b: tuple[ArrayLike, ArrayLike],
similarity: str | Callable = "jaccard"
) -> floatThe similarity of two sets of biclusters.
from sklearn.metrics import coverage_error
coverage_error(
y_true: ArrayLike,
y_score: ArrayLike,
sample_weight: ArrayLike | None = None
) -> floatCoverage error measure.
from sklearn.metrics import label_ranking_average_precision_score
label_ranking_average_precision_score(
y_true: ArrayLike,
y_score: ArrayLike,
sample_weight: ArrayLike | None = None
) -> floatCompute ranking-based average precision.
from sklearn.metrics import label_ranking_loss
label_ranking_loss(
y_true: ArrayLike,
y_score: ArrayLike,
sample_weight: ArrayLike | None = None
) -> floatCompute Ranking loss measure.
from sklearn.metrics import pairwise_distances
pairwise_distances(
X: ArrayLike,
Y: ArrayLike | None = None,
metric: str | Callable = "euclidean",
n_jobs: int | None = None,
force_all_finite: bool | str = True,
**kwds
) -> ArrayLikeCompute the distance matrix from a vector array X and optional Y.
from sklearn.metrics import euclidean_distances
euclidean_distances(
X: ArrayLike,
Y: ArrayLike | None = None,
Y_norm_squared: ArrayLike | None = None,
squared: bool = False,
X_norm_squared: ArrayLike | None = None
) -> ArrayLikeCompute the distance matrix between each pair from a vector array X.
from sklearn.metrics import manhattan_distances
manhattan_distances(
X: ArrayLike,
Y: ArrayLike | None = None,
sum_over_features: bool = True
) -> ArrayLikeCompute the L1 distances between the vectors in X and Y.
from sklearn.metrics import cosine_distances
cosine_distances(
X: ArrayLike,
Y: ArrayLike | None = None
) -> ArrayLikeCompute cosine distance between samples in X and Y.
from sklearn.metrics import haversine_distances
haversine_distances(
X: ArrayLike,
Y: ArrayLike | None = None
) -> ArrayLikeCompute the Haversine distance between samples in X and Y.
from sklearn.metrics import cosine_similarity
cosine_similarity(
X: ArrayLike,
Y: ArrayLike | None = None,
dense_output: bool = True
) -> ArrayLikeCompute cosine similarity between samples in X and Y.
from sklearn.metrics import linear_kernel
linear_kernel(
X: ArrayLike,
Y: ArrayLike | None = None,
dense_output: bool = True
) -> ArrayLikeCompute the linear kernel between X and Y.
from sklearn.metrics import polynomial_kernel
polynomial_kernel(
X: ArrayLike,
Y: ArrayLike | None = None,
degree: int = 3,
gamma: float | None = None,
coef0: float = 1,
dense_output: bool = True
) -> ArrayLikeCompute the polynomial kernel between X and Y.
from sklearn.metrics import rbf_kernel
rbf_kernel(
X: ArrayLike,
Y: ArrayLike | None = None,
gamma: float | None = None,
dense_output: bool = True
) -> ArrayLikeCompute the rbf (gaussian) kernel between X and Y.
from sklearn.metrics import sigmoid_kernel
sigmoid_kernel(
X: ArrayLike,
Y: ArrayLike | None = None,
gamma: float | None = None,
coef0: float = 1,
dense_output: bool = True
) -> ArrayLikeCompute the sigmoid kernel between X and Y.
from sklearn.metrics import laplacian_kernel
laplacian_kernel(
X: ArrayLike,
Y: ArrayLike | None = None,
gamma: float | None = None,
dense_output: bool = True
) -> ArrayLikeCompute the laplacian kernel between X and Y.
from sklearn.metrics import chi2_kernel
chi2_kernel(
X: ArrayLike,
Y: ArrayLike | None = None,
gamma: float = 1.0,
dense_output: bool = True
) -> ArrayLikeCompute the exponential chi-squared kernel between X and Y.
from sklearn.metrics import additive_chi2_kernel
additive_chi2_kernel(
X: ArrayLike,
Y: ArrayLike | None = None,
dense_output: bool = True
) -> ArrayLikeCompute the additive chi-squared kernel between observations in X and Y.
from sklearn.metrics import DistanceMetric
DistanceMetric(
metric: str,
**kwargs
)DistanceMetric class.
from sklearn.metrics import dcg_score
dcg_score(
y_true: ArrayLike,
y_score: ArrayLike,
k: int | None = None,
log_base: float = 2,
sample_weight: ArrayLike | None = None,
ignore_ties: bool = False
) -> floatCompute Discounted Cumulative Gain.
from sklearn.metrics import ndcg_score
ndcg_score(
y_true: ArrayLike,
y_score: ArrayLike,
k: int | None = None,
sample_weight: ArrayLike | None = None,
ignore_ties: bool = False
) -> floatCompute Normalized Discounted Cumulative Gain.
from sklearn.metrics import make_scorer
make_scorer(
score_func: Callable,
greater_is_better: bool = True,
needs_proba: bool = False,
needs_threshold: bool = False,
**kwargs
) -> CallableMake a scorer from a performance metric or loss function.
from sklearn.metrics import get_scorer
get_scorer(
scoring: str
) -> CallableGet a scorer from string.
from sklearn.metrics import get_scorer_names
get_scorer_names() -> list[str]Get the names of all available scorers.
from sklearn.metrics import check_scoring
check_scoring(
estimator: BaseEstimator,
scoring: str | Callable | None = None,
allow_none: bool = False
) -> Callable | NoneDetermine scorer from user options.
from sklearn.metrics import ConfusionMatrixDisplay
ConfusionMatrixDisplay(
confusion_matrix: ArrayLike,
display_labels: ArrayLike | None = None
)Confusion Matrix visualization.
from sklearn.metrics import RocCurveDisplay
RocCurveDisplay(
fpr: ArrayLike,
tpr: ArrayLike,
roc_auc: float | None = None,
estimator_name: str | None = None,
pos_label: str | int | None = None
)ROC Curve visualization.
from sklearn.metrics import PrecisionRecallDisplay
PrecisionRecallDisplay(
precision: ArrayLike,
recall: ArrayLike,
average_precision: float | None = None,
estimator_name: str | None = None,
pos_label: str | int | None = None
)Precision Recall visualization.
from sklearn.metrics import DetCurveDisplay
DetCurveDisplay(
fpr: ArrayLike,
fnr: ArrayLike,
estimator_name: str | None = None,
pos_label: str | int | None = None
)DET curve visualization.
from sklearn.metrics import PredictionErrorDisplay
PredictionErrorDisplay(
y_true: ArrayLike,
y_pred: ArrayLike,
kind: str = "actual_vs_predicted",
subsample: float | int | None = None,
random_state: int | RandomState | None = None
)Prediction error visualization.
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
confusion_matrix, classification_report, roc_auc_score
)
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
# Generate sample data
X, y = make_classification(n_samples=1000, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
# Make predictions
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_proba)
print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-score: {f1:.3f}")
print(f"ROC-AUC: {roc_auc:.3f}")
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print(f"\nConfusion Matrix:\n{cm}")
# Classification report
report = classification_report(y_test, y_pred)
print(f"\nClassification Report:\n{report}")from sklearn.metrics import (
mean_squared_error, mean_absolute_error, r2_score,
explained_variance_score, max_error
)
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
# Generate sample data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
reg = RandomForestRegressor(n_estimators=100, random_state=42)
reg.fit(X_train, y_train)
# Make predictions
y_pred = reg.predict(X_test)
# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
ev = explained_variance_score(y_test, y_pred)
max_err = max_error(y_test, y_pred)
print(f"MSE: {mse:.3f}")
print(f"RMSE: {rmse:.3f}")
print(f"MAE: {mae:.3f}")
print(f"R² Score: {r2:.3f}")
print(f"Explained Variance: {ev:.3f}")
print(f"Max Error: {max_err:.3f}")from sklearn.metrics import (
silhouette_score, calinski_harabasz_score, davies_bouldin_score,
adjusted_rand_score, adjusted_mutual_info_score
)
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
# Generate sample data
X, y_true = make_blobs(n_samples=300, centers=4, n_features=2,
random_state=42, cluster_std=0.8)
# Perform clustering
kmeans = KMeans(n_clusters=4, random_state=42)
y_pred = kmeans.fit_predict(X)
# Internal validation metrics
silhouette = silhouette_score(X, y_pred)
calinski_harabasz = calinski_harabasz_score(X, y_pred)
davies_bouldin = davies_bouldin_score(X, y_pred)
print(f"Silhouette Score: {silhouette:.3f}")
print(f"Calinski-Harabasz Score: {calinski_harabasz:.3f}")
print(f"Davies-Bouldin Score: {davies_bouldin:.3f}")
# External validation metrics (when true labels are available)
ari = adjusted_rand_score(y_true, y_pred)
ami = adjusted_mutual_info_score(y_true, y_pred)
print(f"Adjusted Rand Index: {ari:.3f}")
print(f"Adjusted Mutual Information: {ami:.3f}")from sklearn.metrics import roc_curve, RocCurveDisplay
import matplotlib.pyplot as plt
# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
# Create ROC curve display
roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc)
roc_display.plot()
plt.title('ROC Curve')
plt.show()
# Or create directly from estimator
RocCurveDisplay.from_estimator(clf, X_test, y_test)
plt.show()from sklearn.metrics import make_scorer
import numpy as np
# Define custom scoring function
def custom_accuracy(y_true, y_pred):
"""Custom accuracy that weights errors differently."""
return np.mean(y_true == y_pred) * 1.1 # Boost accuracy by 10%
# Create scorer
custom_scorer = make_scorer(custom_accuracy, greater_is_better=True)
# Use in cross-validation or grid search
from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf, X, y, cv=5, scoring=custom_scorer)
print(f"Custom scores: {scores}")Install with Tessl CLI
npx tessl i tessl/pypi-scikit-learndocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10