CatBoost is a fast, scalable, high performance gradient boosting on decision trees library used for ranking, classification, regression and other ML tasks.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
CatBoost provides a comprehensive metrics framework for evaluating model performance across various machine learning tasks. The framework includes built-in metrics for classification, regression, and ranking, with dynamic class generation for metric-specific functionality.
Core base class and infrastructure for all CatBoost metrics.
class BuiltinMetric:
"""
Base class for all CatBoost built-in metrics.
Provides common interface and functionality for metric evaluation,
parameter validation, and configuration management across all
metric types in CatBoost.
"""
@staticmethod
def params_with_defaults():
"""
Get valid metric parameters with their default values.
Returns:
dict: Parameter names mapped to default values and mandatory flags
- 'default_value': Default parameter value or None
- 'is_mandatory': Whether parameter is required (bool)
"""
def __str__(self):
"""
Get string representation of the metric with parameters.
Returns:
str: Metric string representation
"""
def set_hints(self, **hints):
"""
Set hints for metric calculation (not validated).
Parameters:
- **hints: Arbitrary hint parameters for metric behavior
Returns:
self: For method chaining
"""
def eval(self, label, approx, weight=None, group_id=None,
group_weight=None, subgroup_id=None, pairs=None,
thread_count=-1):
"""
Evaluate metric with raw predictions and labels.
Parameters:
- label: True target values (array-like)
- approx: Model predictions (array-like)
- weight: Sample weights (array-like, optional)
- group_id: Group identifiers for ranking (array-like, optional)
- group_weight: Group weights (array-like, optional)
- subgroup_id: Subgroup identifiers (array-like, optional)
- pairs: Pairwise constraints for ranking (array-like or path, optional)
- thread_count: Number of threads for computation (int)
Returns:
float: Metric value
"""
def is_max_optimal(self):
"""
Check if higher metric values indicate better performance.
Returns:
bool: True if metric should be maximized, False if minimized
"""
def is_min_optimal(self):
"""
Check if lower metric values indicate better performance.
Returns:
bool: True if metric should be minimized, False if maximized
"""CatBoost dynamically generates metric classes based on the underlying C++ implementation. Each metric type has specific variants with different parameter configurations.
# Classification Metrics (examples of dynamically generated classes)
class Logloss(BuiltinMetric):
"""Logarithmic loss for binary and multi-class classification."""
class CrossEntropy(BuiltinMetric):
"""Cross-entropy loss for classification tasks."""
class MultiClass(BuiltinMetric):
"""Multi-class classification accuracy."""
class Accuracy(BuiltinMetric):
"""Classification accuracy metric."""
class Precision(BuiltinMetric):
"""Precision metric for classification."""
class Recall(BuiltinMetric):
"""Recall metric for classification."""
class F1(BuiltinMetric):
"""F1-score metric for classification."""
class AUC(BuiltinMetric):
"""Area Under the ROC Curve metric."""
# Regression Metrics
class RMSE(BuiltinMetric):
"""Root Mean Squared Error for regression."""
class MAE(BuiltinMetric):
"""Mean Absolute Error for regression."""
class MAPE(BuiltinMetric):
"""Mean Absolute Percentage Error for regression."""
class R2(BuiltinMetric):
"""R-squared coefficient of determination."""
class MSLE(BuiltinMetric):
"""Mean Squared Logarithmic Error for regression."""
# Ranking Metrics
class NDCG(BuiltinMetric):
"""Normalized Discounted Cumulative Gain for ranking."""
class DCG(BuiltinMetric):
"""Discounted Cumulative Gain for ranking."""
class MAP(BuiltinMetric):
"""Mean Average Precision for ranking."""
class MRR(BuiltinMetric):
"""Mean Reciprocal Rank for ranking."""
class ERR(BuiltinMetric):
"""Expected Reciprocal Rank for ranking."""from catboost import metrics
import numpy as np
# Create sample data
y_true = np.array([0, 1, 1, 0, 1])
y_pred = np.array([0.1, 0.8, 0.7, 0.3, 0.9])
# Initialize and evaluate classification metrics
logloss = metrics.Logloss()
accuracy = metrics.Accuracy()
auc = metrics.AUC()
# Evaluate metrics
logloss_value = logloss.eval(y_true, y_pred)
accuracy_value = accuracy.eval(y_true, y_pred > 0.5)
auc_value = auc.eval(y_true, y_pred)
print(f"LogLoss: {logloss_value:.4f}")
print(f"Accuracy: {accuracy_value:.4f}")
print(f"AUC: {auc_value:.4f}")
# Check optimization direction
print(f"LogLoss should be minimized: {logloss.is_min_optimal()}")
print(f"AUC should be maximized: {auc.is_max_optimal()}")from catboost import metrics
import numpy as np
# Sample regression data
y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
y_pred = np.array([1.1, 2.2, 2.8, 4.2, 4.8])
# Initialize regression metrics
rmse = metrics.RMSE()
mae = metrics.MAE()
r2 = metrics.R2()
# Evaluate metrics
rmse_value = rmse.eval(y_true, y_pred)
mae_value = mae.eval(y_true, y_pred)
r2_value = r2.eval(y_true, y_pred)
print(f"RMSE: {rmse_value:.4f}")
print(f"MAE: {mae_value:.4f}")
print(f"R²: {r2_value:.4f}")
# Get metric parameters
print(f"RMSE parameters: {rmse.params_with_defaults()}")from catboost import metrics
import numpy as np
# Sample ranking data
y_true = np.array([2, 1, 0, 3, 1, 2]) # Relevance scores
y_pred = np.array([0.8, 0.6, 0.3, 0.9, 0.5, 0.7]) # Predictions
group_ids = np.array([0, 0, 0, 1, 1, 1]) # Query groups
# Initialize ranking metrics
ndcg = metrics.NDCG()
dcg = metrics.DCG()
map_metric = metrics.MAP()
# Evaluate with group information
ndcg_value = ndcg.eval(y_true, y_pred, group_id=group_ids)
dcg_value = dcg.eval(y_true, y_pred, group_id=group_ids)
map_value = map_metric.eval(y_true, y_pred, group_id=group_ids)
print(f"NDCG: {ndcg_value:.4f}")
print(f"DCG: {dcg_value:.4f}")
print(f"MAP: {map_value:.4f}")from catboost import metrics
import numpy as np
# Data with sample weights
y_true = np.array([0, 1, 1, 0, 1])
y_pred = np.array([0.1, 0.8, 0.7, 0.3, 0.9])
weights = np.array([1.0, 2.0, 1.5, 1.0, 2.5]) # Sample importance
# Initialize metrics
logloss = metrics.Logloss()
precision = metrics.Precision()
# Evaluate with weights
weighted_logloss = logloss.eval(y_true, y_pred, weight=weights)
weighted_precision = precision.eval(y_true, y_pred > 0.5, weight=weights)
print(f"Weighted LogLoss: {weighted_logloss:.4f}")
print(f"Weighted Precision: {weighted_precision:.4f}")from catboost import metrics
# Initialize metric with specific parameters
# (Parameter availability depends on metric type)
auc_metric = metrics.AUC()
f1_metric = metrics.F1()
# Set hints for metric behavior
auc_metric.set_hints(skip_train=True)
f1_metric.set_hints(use_weights=True)
# Get string representation with parameters
print(f"AUC metric: {auc_metric}")
print(f"F1 metric: {f1_metric}")
# Check available parameters
print(f"AUC parameters: {auc_metric.params_with_defaults()}")from catboost import metrics
import numpy as np
# Large dataset simulation
np.random.seed(42)
n_samples = 100000
y_true = np.random.randint(0, 2, n_samples)
y_pred = np.random.random(n_samples)
# Initialize metric
auc = metrics.AUC()
# Evaluate with multiple threads for large datasets
auc_value = auc.eval(y_true, y_pred, thread_count=4)
print(f"AUC (4 threads): {auc_value:.6f}")
# Compare with single-threaded evaluation
auc_single = auc.eval(y_true, y_pred, thread_count=1)
print(f"AUC (1 thread): {auc_single:.6f}")The metrics framework integrates seamlessly with CatBoost model training and evaluation:
from catboost import CatBoostClassifier, metrics
import numpy as np
# Create model with custom metric
model = CatBoostClassifier(
iterations=100,
eval_metric='AUC', # Use built-in metric name
verbose=False
)
# Train model
model.fit(X_train, y_train, eval_set=(X_test, y_test))
# Manual metric evaluation
auc_metric = metrics.AUC()
predictions = model.predict_proba(X_test)[:, 1]
manual_auc = auc_metric.eval(y_test, predictions)
print(f"Manual AUC calculation: {manual_auc:.6f}")
# Compare with model's built-in evaluation
model_metrics = model.get_evals_result()
print(f"Model's AUC: {model_metrics['validation']['AUC'][-1]:.6f}")The CatBoost metrics framework provides extensive coverage across machine learning tasks:
Each metric type may have multiple variants with different default parameters, all dynamically generated from the underlying CatBoost implementation.
Install with Tessl CLI
npx tessl i tessl/pypi-catboost