tessl/pypi-xgboost

XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible, and portable

Overview

Eval results

Files

Callbacks

Name: tessl/pypi-xgboost
Author: tessl

XGBoost provides a comprehensive callback system for monitoring and controlling the training process. Callbacks allow custom logic to be executed at different stages of training, including early stopping, learning rate scheduling, evaluation monitoring, and model checkpointing.

Capabilities

Base Callback Class

Abstract base class for creating custom training callbacks.

class TrainingCallback:
    def before_training(self, model):
        """
        Called before training starts.

        Parameters:
        - model: The model instance

        Returns:
        model: The model instance (possibly modified)
        """

    def after_training(self, model):
        """
        Called after training completes.

        Parameters:
        - model: The trained model instance

        Returns:
        model: The model instance (possibly modified)
        """

    def before_iteration(self, model, epoch, evals_log):
        """
        Called before each training iteration.

        Parameters:
        - model: Current model instance
        - epoch: Current epoch number
        - evals_log: Evaluation results log

        Returns:
        bool: False to continue training, True to stop training
        """

    def after_iteration(self, model, epoch, evals_log):
        """
        Called after each training iteration.

        Parameters:
        - model: Current model instance
        - epoch: Current epoch number
        - evals_log: Evaluation results log

        Returns:
        bool: False to continue training, True to stop training
        """

Callback Container

Container class for managing multiple callbacks during training.

class CallbackContainer:
    def __init__(self, callbacks, metric=None, is_maximize=False):
        """
        Container for managing training callbacks.

        Parameters:
        - callbacks: List of TrainingCallback objects
        - metric: Primary evaluation metric name
        - is_maximize: Whether to maximize the metric (True) or minimize (False)
        """

    def before_training(self, model):
        """Execute before_training for all callbacks."""

    def after_training(self, model):
        """Execute after_training for all callbacks."""

    def before_iteration(self, model, epoch, evals_log):
        """Execute before_iteration for all callbacks."""

    def after_iteration(self, model, epoch, evals_log):
        """Execute after_iteration for all callbacks."""

Early Stopping

Callback that stops training when evaluation metric stops improving.

class EarlyStopping(TrainingCallback):
    def __init__(
        self,
        rounds,
        metric_name=None,
        data_name=None,
        maximize=False,
        save_best=False,
        min_delta=0.0
    ):
        """
        Early stopping callback.

        Parameters:
        - rounds: Number of rounds to wait for improvement
        - metric_name: Name of metric to monitor
        - data_name: Name of dataset to monitor
        - maximize: Whether to maximize metric (True) or minimize (False)
        - save_best: Whether to save best model
        - min_delta: Minimum change to qualify as improvement
        """

Learning Rate Scheduler

Callback for scheduling learning rate changes during training.

class LearningRateScheduler(TrainingCallback):
    def __init__(self, learning_rates):
        """
        Learning rate scheduler callback.

        Parameters:
        - learning_rates: Dictionary mapping epoch to learning rate,
                         or callable that takes epoch and returns learning rate
        """

Evaluation Monitor

Callback for monitoring and logging evaluation metrics during training.

class EvaluationMonitor(TrainingCallback):
    def __init__(
        self,
        rank=0,
        period=1,
        show_stdv=True
    ):
        """
        Evaluation monitoring callback.

        Parameters:
        - rank: Process rank for distributed training
        - period: Frequency of logging (every N epochs)
        - show_stdv: Whether to show standard deviation in CV
        """

Training Checkpoint

Callback for saving model checkpoints during training.

class TrainingCheckPoint(TrainingCallback):
    def __init__(
        self,
        directory,
        name="model",
        as_pickle=False,
        interval=1
    ):
        """
        Training checkpoint callback.

        Parameters:
        - directory: Directory to save checkpoints
        - name: Base name for checkpoint files
        - as_pickle: Whether to save as pickle (True) or XGBoost format (False)
        - interval: Checkpoint interval (every N epochs)
        """

Usage Examples

Basic Early Stopping

import xgboost as xgb
from xgboost.callback import EarlyStopping

# Create early stopping callback
early_stop = EarlyStopping(
    rounds=10,
    metric_name='rmse',
    data_name='eval',
    maximize=False,
    save_best=True
)

# Train with early stopping
dtrain = xgb.DMatrix(X_train, label=y_train)
deval = xgb.DMatrix(X_eval, label=y_eval)

model = xgb.train(
    params,
    dtrain,
    num_boost_round=1000,
    evals=[(deval, 'eval')],
    callbacks=[early_stop],
    verbose_eval=False
)

Learning Rate Scheduling

from xgboost.callback import LearningRateScheduler

# Define learning rate schedule
def lr_schedule(epoch):
    if epoch < 50:
        return 0.1
    elif epoch < 100:
        return 0.05
    else:
        return 0.01

# Create scheduler callback
lr_scheduler = LearningRateScheduler(lr_schedule)

# Train with learning rate scheduling
model = xgb.train(
    params,
    dtrain,
    num_boost_round=150,
    callbacks=[lr_scheduler]
)

Multiple Callbacks

from xgboost.callback import (
    EarlyStopping,
    EvaluationMonitor,
    TrainingCheckPoint
)

# Create multiple callbacks
callbacks = [
    EarlyStopping(rounds=10, save_best=True),
    EvaluationMonitor(period=10),
    TrainingCheckPoint(directory='./checkpoints', interval=50)
]

# Train with multiple callbacks
model = xgb.train(
    params,
    dtrain,
    num_boost_round=1000,
    evals=[(deval, 'eval')],
    callbacks=callbacks
)

Custom Callback

from xgboost.callback import TrainingCallback

class CustomLoggingCallback(TrainingCallback):
    def __init__(self, log_file):
        self.log_file = log_file
        
    def before_training(self, model):
        with open(self.log_file, 'w') as f:
            f.write("Training started\n")
        return model
        
    def after_iteration(self, model, epoch, evals_log):
        if evals_log and 'eval' in evals_log:
            metric_value = evals_log['eval']['rmse'][-1]
            with open(self.log_file, 'a') as f:
                f.write(f"Epoch {epoch}: RMSE = {metric_value}\n")
        return False  # Continue training

# Use custom callback
custom_logger = CustomLoggingCallback('training.log')

model = xgb.train(
    params,
    dtrain,
    num_boost_round=100,
    evals=[(deval, 'eval')],
    callbacks=[custom_logger]
)

Scikit-Learn Interface with Callbacks

from xgboost import XGBRegressor
from xgboost.callback import EarlyStopping

# Create callback
early_stop = EarlyStopping(rounds=10)

# Use with sklearn interface
model = XGBRegressor(
    n_estimators=1000,
    early_stopping_rounds=10,  # Alternative to callback
    callbacks=[early_stop]     # Or use callback directly
)

model.fit(
    X_train, y_train,
    eval_set=[(X_eval, y_eval)],
    verbose=False
)

Install with Tessl CLI