XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible, and portable
XGBoost provides a comprehensive callback system for monitoring and controlling the training process. Callbacks allow custom logic to be executed at different stages of training, including early stopping, learning rate scheduling, evaluation monitoring, and model checkpointing.
Abstract base class for creating custom training callbacks.
class TrainingCallback:
def before_training(self, model):
"""
Called before training starts.
Parameters:
- model: The model instance
Returns:
model: The model instance (possibly modified)
"""
def after_training(self, model):
"""
Called after training completes.
Parameters:
- model: The trained model instance
Returns:
model: The model instance (possibly modified)
"""
def before_iteration(self, model, epoch, evals_log):
"""
Called before each training iteration.
Parameters:
- model: Current model instance
- epoch: Current epoch number
- evals_log: Evaluation results log
Returns:
bool: False to continue training, True to stop training
"""
def after_iteration(self, model, epoch, evals_log):
"""
Called after each training iteration.
Parameters:
- model: Current model instance
- epoch: Current epoch number
- evals_log: Evaluation results log
Returns:
bool: False to continue training, True to stop training
"""Container class for managing multiple callbacks during training.
class CallbackContainer:
def __init__(self, callbacks, metric=None, is_maximize=False):
"""
Container for managing training callbacks.
Parameters:
- callbacks: List of TrainingCallback objects
- metric: Primary evaluation metric name
- is_maximize: Whether to maximize the metric (True) or minimize (False)
"""
def before_training(self, model):
"""Execute before_training for all callbacks."""
def after_training(self, model):
"""Execute after_training for all callbacks."""
def before_iteration(self, model, epoch, evals_log):
"""Execute before_iteration for all callbacks."""
def after_iteration(self, model, epoch, evals_log):
"""Execute after_iteration for all callbacks."""Callback that stops training when evaluation metric stops improving.
class EarlyStopping(TrainingCallback):
def __init__(
self,
rounds,
metric_name=None,
data_name=None,
maximize=False,
save_best=False,
min_delta=0.0
):
"""
Early stopping callback.
Parameters:
- rounds: Number of rounds to wait for improvement
- metric_name: Name of metric to monitor
- data_name: Name of dataset to monitor
- maximize: Whether to maximize metric (True) or minimize (False)
- save_best: Whether to save best model
- min_delta: Minimum change to qualify as improvement
"""Callback for scheduling learning rate changes during training.
class LearningRateScheduler(TrainingCallback):
def __init__(self, learning_rates):
"""
Learning rate scheduler callback.
Parameters:
- learning_rates: Dictionary mapping epoch to learning rate,
or callable that takes epoch and returns learning rate
"""Callback for monitoring and logging evaluation metrics during training.
class EvaluationMonitor(TrainingCallback):
def __init__(
self,
rank=0,
period=1,
show_stdv=True
):
"""
Evaluation monitoring callback.
Parameters:
- rank: Process rank for distributed training
- period: Frequency of logging (every N epochs)
- show_stdv: Whether to show standard deviation in CV
"""Callback for saving model checkpoints during training.
class TrainingCheckPoint(TrainingCallback):
def __init__(
self,
directory,
name="model",
as_pickle=False,
interval=1
):
"""
Training checkpoint callback.
Parameters:
- directory: Directory to save checkpoints
- name: Base name for checkpoint files
- as_pickle: Whether to save as pickle (True) or XGBoost format (False)
- interval: Checkpoint interval (every N epochs)
"""import xgboost as xgb
from xgboost.callback import EarlyStopping
# Create early stopping callback
early_stop = EarlyStopping(
rounds=10,
metric_name='rmse',
data_name='eval',
maximize=False,
save_best=True
)
# Train with early stopping
dtrain = xgb.DMatrix(X_train, label=y_train)
deval = xgb.DMatrix(X_eval, label=y_eval)
model = xgb.train(
params,
dtrain,
num_boost_round=1000,
evals=[(deval, 'eval')],
callbacks=[early_stop],
verbose_eval=False
)from xgboost.callback import LearningRateScheduler
# Define learning rate schedule
def lr_schedule(epoch):
if epoch < 50:
return 0.1
elif epoch < 100:
return 0.05
else:
return 0.01
# Create scheduler callback
lr_scheduler = LearningRateScheduler(lr_schedule)
# Train with learning rate scheduling
model = xgb.train(
params,
dtrain,
num_boost_round=150,
callbacks=[lr_scheduler]
)from xgboost.callback import (
EarlyStopping,
EvaluationMonitor,
TrainingCheckPoint
)
# Create multiple callbacks
callbacks = [
EarlyStopping(rounds=10, save_best=True),
EvaluationMonitor(period=10),
TrainingCheckPoint(directory='./checkpoints', interval=50)
]
# Train with multiple callbacks
model = xgb.train(
params,
dtrain,
num_boost_round=1000,
evals=[(deval, 'eval')],
callbacks=callbacks
)from xgboost.callback import TrainingCallback
class CustomLoggingCallback(TrainingCallback):
def __init__(self, log_file):
self.log_file = log_file
def before_training(self, model):
with open(self.log_file, 'w') as f:
f.write("Training started\n")
return model
def after_iteration(self, model, epoch, evals_log):
if evals_log and 'eval' in evals_log:
metric_value = evals_log['eval']['rmse'][-1]
with open(self.log_file, 'a') as f:
f.write(f"Epoch {epoch}: RMSE = {metric_value}\n")
return False # Continue training
# Use custom callback
custom_logger = CustomLoggingCallback('training.log')
model = xgb.train(
params,
dtrain,
num_boost_round=100,
evals=[(deval, 'eval')],
callbacks=[custom_logger]
)from xgboost import XGBRegressor
from xgboost.callback import EarlyStopping
# Create callback
early_stop = EarlyStopping(rounds=10)
# Use with sklearn interface
model = XGBRegressor(
n_estimators=1000,
early_stopping_rounds=10, # Alternative to callback
callbacks=[early_stop] # Or use callback directly
)
model.fit(
X_train, y_train,
eval_set=[(X_eval, y_eval)],
verbose=False
)Install with Tessl CLI
npx tessl i tessl/pypi-xgboost