XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible, and portable
XGBoost provides scikit-learn compatible estimators that follow sklearn conventions for seamless integration with existing ML pipelines. These estimators provide familiar fit/predict interfaces while leveraging XGBoost's powerful gradient boosting algorithms.
Base class for all XGBoost sklearn-compatible estimators.
class XGBModel:
def __init__(
self,
n_estimators=100,
max_depth=None,
max_leaves=None,
max_bin=None,
grow_policy=None,
learning_rate=None,
verbosity=None,
objective=None,
booster=None,
tree_method=None,
n_jobs=None,
gamma=None,
min_child_weight=None,
max_delta_step=None,
subsample=None,
sampling_method=None,
colsample_bytree=None,
colsample_bylevel=None,
colsample_bynode=None,
reg_alpha=None,
reg_lambda=None,
scale_pos_weight=None,
base_score=None,
random_state=None,
missing=None,
num_parallel_tree=None,
monotone_constraints=None,
interaction_constraints=None,
importance_type='gain',
device=None,
validate_parameters=None,
enable_categorical=False,
feature_types=None,
max_cat_to_onehot=None,
max_cat_threshold=None,
multi_strategy=None,
eval_metric=None,
early_stopping_rounds=None,
callbacks=None,
**kwargs
):
"""
Base XGBoost sklearn-compatible estimator.
Parameters:
- n_estimators: Number of boosting rounds
- max_depth: Maximum tree depth
- learning_rate: Boosting learning rate
- objective: Learning objective
- booster: Booster type ('gbtree', 'gblinear', 'dart')
- tree_method: Tree construction algorithm
- n_jobs: Number of parallel threads
- gamma: Minimum loss reduction for split
- min_child_weight: Minimum sum of instance weight in child
- subsample: Subsample ratio of training instances
- colsample_bytree: Subsample ratio of columns per tree
- reg_alpha: L1 regularization term
- reg_lambda: L2 regularization term
- random_state: Random seed
- enable_categorical: Enable categorical feature support
"""
def fit(
self,
X,
y,
sample_weight=None,
base_margin=None,
eval_set=None,
eval_metric=None,
early_stopping_rounds=None,
verbose=True,
xgb_model=None,
sample_weight_eval_set=None,
base_margin_eval_set=None,
feature_weights=None,
callbacks=None
):
"""
Fit the model to training data.
Parameters:
- X: Training features
- y: Training labels
- sample_weight: Sample weights
- base_margin: Base margin for each sample
- eval_set: Evaluation sets as list of (X, y) tuples
- eval_metric: Evaluation metric(s)
- early_stopping_rounds: Early stopping rounds
- verbose: Verbosity
- xgb_model: Existing model to continue training
- sample_weight_eval_set: Sample weights for eval sets
- base_margin_eval_set: Base margins for eval sets
- feature_weights: Feature weights
- callbacks: Callback functions
Returns:
Self
"""
def predict(
self,
X,
output_margin=False,
validate_features=True,
base_margin=None,
iteration_range=None
):
"""
Make predictions on input data.
Parameters:
- X: Input features
- output_margin: Output raw margins
- validate_features: Validate feature names/types
- base_margin: Base margin for each sample
- iteration_range: Range of boosting rounds
Returns:
Predictions as numpy array
"""
def get_booster(self):
"""Get underlying Booster object."""
def save_model(self, fname):
"""Save model to file."""
def load_model(self, fname):
"""Load model from file."""
@property
def feature_importances_(self):
"""Feature importances as numpy array."""
def get_params(self, deep=True):
"""Get estimator parameters."""
def set_params(self, **params):
"""Set estimator parameters."""XGBoost regressor for continuous target variables.
class XGBRegressor(XGBModel):
def __init__(self, **kwargs):
"""
XGBoost regressor.
Inherits all parameters from XGBModel.
Default objective: 'reg:squarederror'
"""
def fit(self, X, y, **kwargs):
"""Fit regressor to training data."""
def predict(self, X, **kwargs):
"""Predict continuous values."""XGBoost classifier for categorical target variables.
class XGBClassifier(XGBModel):
def __init__(self, **kwargs):
"""
XGBoost classifier.
Inherits all parameters from XGBModel.
Default objective: 'binary:logistic' or 'multi:softprob'
"""
def fit(self, X, y, **kwargs):
"""Fit classifier to training data."""
def predict(self, X, **kwargs):
"""Predict class labels."""
def predict_proba(
self,
X,
validate_features=True,
base_margin=None,
iteration_range=None
):
"""
Predict class probabilities.
Parameters:
- X: Input features
- validate_features: Validate feature names/types
- base_margin: Base margin for each sample
- iteration_range: Range of boosting rounds
Returns:
Class probabilities as numpy array
"""
def predict_log_proba(self, X, **kwargs):
"""Predict log class probabilities."""
@property
def classes_(self):
"""Unique class labels."""XGBoost ranker for learning-to-rank problems.
class XGBRanker(XGBModel):
def __init__(self, **kwargs):
"""
XGBoost ranker for learning-to-rank.
Inherits all parameters from XGBModel.
Default objective: 'rank:pairwise'
"""
def fit(
self,
X,
y,
group=None,
qid=None,
sample_weight=None,
base_margin=None,
eval_set=None,
eval_group=None,
eval_qid=None,
eval_metric=None,
early_stopping_rounds=None,
verbose=True,
xgb_model=None,
sample_weight_eval_set=None,
base_margin_eval_set=None,
feature_weights=None,
callbacks=None
):
"""
Fit ranker to training data.
Parameters:
- X: Training features
- y: Training relevance scores
- group: Group sizes for queries
- qid: Query IDs for each sample
- (other parameters same as XGBModel.fit)
Returns:
Self
"""
def predict(self, X, **kwargs):
"""Predict ranking scores."""XGBoost implementations of random forest algorithms.
class XGBRFRegressor(XGBModel):
def __init__(self, **kwargs):
"""
XGBoost random forest regressor.
Configured with random forest defaults:
- colsample_bynode=0.8
- learning_rate=1.0
- max_depth=None
- n_estimators=100
- num_parallel_tree=100
- reg_lambda=1e-5
- subsample=0.8
"""
def fit(self, X, y, **kwargs):
"""Fit random forest regressor."""
def predict(self, X, **kwargs):
"""Predict using random forest."""
class XGBRFClassifier(XGBModel):
def __init__(self, **kwargs):
"""
XGBoost random forest classifier.
Same defaults as XGBRFRegressor with classification objective.
"""
def fit(self, X, y, **kwargs):
"""Fit random forest classifier."""
def predict(self, X, **kwargs):
"""Predict class labels using random forest."""
def predict_proba(self, X, **kwargs):
"""Predict class probabilities using random forest."""from xgboost import XGBClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Train classifier
clf = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
clf.fit(X_train, y_train)
# Make predictions
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)
# Feature importance
importance = clf.feature_importances_from xgboost import XGBRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
# Load data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Train with early stopping
reg = XGBRegressor(
n_estimators=1000,
max_depth=3,
learning_rate=0.1,
early_stopping_rounds=10
)
reg.fit(
X_train, y_train,
eval_set=[(X_test, y_test)],
verbose=False
)
# Predict
y_pred = reg.predict(X_test)Install with Tessl CLI
npx tessl i tessl/pypi-xgboost