tessl/pypi-xgboost

XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible, and portable

Overview

Eval results

Files

Scikit-Learn Interface

Name: tessl/pypi-xgboost
Author: tessl

XGBoost provides scikit-learn compatible estimators that follow sklearn conventions for seamless integration with existing ML pipelines. These estimators provide familiar fit/predict interfaces while leveraging XGBoost's powerful gradient boosting algorithms.

Capabilities

Base Model Class

Base class for all XGBoost sklearn-compatible estimators.

class XGBModel:
    def __init__(
        self,
        n_estimators=100,
        max_depth=None,
        max_leaves=None,
        max_bin=None,
        grow_policy=None,
        learning_rate=None,
        verbosity=None,
        objective=None,
        booster=None,
        tree_method=None,
        n_jobs=None,
        gamma=None,
        min_child_weight=None,
        max_delta_step=None,
        subsample=None,
        sampling_method=None,
        colsample_bytree=None,
        colsample_bylevel=None,
        colsample_bynode=None,
        reg_alpha=None,
        reg_lambda=None,
        scale_pos_weight=None,
        base_score=None,
        random_state=None,
        missing=None,
        num_parallel_tree=None,
        monotone_constraints=None,
        interaction_constraints=None,
        importance_type='gain',
        device=None,
        validate_parameters=None,
        enable_categorical=False,
        feature_types=None,
        max_cat_to_onehot=None,
        max_cat_threshold=None,
        multi_strategy=None,
        eval_metric=None,
        early_stopping_rounds=None,
        callbacks=None,
        **kwargs
    ):
        """
        Base XGBoost sklearn-compatible estimator.

        Parameters:
        - n_estimators: Number of boosting rounds
        - max_depth: Maximum tree depth
        - learning_rate: Boosting learning rate
        - objective: Learning objective
        - booster: Booster type ('gbtree', 'gblinear', 'dart')
        - tree_method: Tree construction algorithm
        - n_jobs: Number of parallel threads
        - gamma: Minimum loss reduction for split
        - min_child_weight: Minimum sum of instance weight in child
        - subsample: Subsample ratio of training instances
        - colsample_bytree: Subsample ratio of columns per tree
        - reg_alpha: L1 regularization term
        - reg_lambda: L2 regularization term
        - random_state: Random seed
        - enable_categorical: Enable categorical feature support
        """

    def fit(
        self,
        X,
        y,
        sample_weight=None,
        base_margin=None,
        eval_set=None,
        eval_metric=None,
        early_stopping_rounds=None,
        verbose=True,
        xgb_model=None,
        sample_weight_eval_set=None,
        base_margin_eval_set=None,
        feature_weights=None,
        callbacks=None
    ):
        """
        Fit the model to training data.

        Parameters:
        - X: Training features
        - y: Training labels
        - sample_weight: Sample weights
        - base_margin: Base margin for each sample
        - eval_set: Evaluation sets as list of (X, y) tuples
        - eval_metric: Evaluation metric(s)
        - early_stopping_rounds: Early stopping rounds
        - verbose: Verbosity
        - xgb_model: Existing model to continue training
        - sample_weight_eval_set: Sample weights for eval sets
        - base_margin_eval_set: Base margins for eval sets
        - feature_weights: Feature weights
        - callbacks: Callback functions

        Returns:
        Self
        """

    def predict(
        self,
        X,
        output_margin=False,
        validate_features=True,
        base_margin=None,
        iteration_range=None
    ):
        """
        Make predictions on input data.

        Parameters:
        - X: Input features
        - output_margin: Output raw margins
        - validate_features: Validate feature names/types
        - base_margin: Base margin for each sample
        - iteration_range: Range of boosting rounds

        Returns:
        Predictions as numpy array
        """

    def get_booster(self):
        """Get underlying Booster object."""

    def save_model(self, fname):
        """Save model to file."""

    def load_model(self, fname):
        """Load model from file."""

    @property
    def feature_importances_(self):
        """Feature importances as numpy array."""

    def get_params(self, deep=True):
        """Get estimator parameters."""

    def set_params(self, **params):
        """Set estimator parameters."""

Regression

XGBoost regressor for continuous target variables.

class XGBRegressor(XGBModel):
    def __init__(self, **kwargs):
        """
        XGBoost regressor.
        
        Inherits all parameters from XGBModel.
        Default objective: 'reg:squarederror'
        """

    def fit(self, X, y, **kwargs):
        """Fit regressor to training data."""

    def predict(self, X, **kwargs):
        """Predict continuous values."""

Classification

XGBoost classifier for categorical target variables.

class XGBClassifier(XGBModel):
    def __init__(self, **kwargs):
        """
        XGBoost classifier.
        
        Inherits all parameters from XGBModel.
        Default objective: 'binary:logistic' or 'multi:softprob'
        """

    def fit(self, X, y, **kwargs):
        """Fit classifier to training data."""

    def predict(self, X, **kwargs):
        """Predict class labels."""

    def predict_proba(
        self,
        X,
        validate_features=True,
        base_margin=None,
        iteration_range=None
    ):
        """
        Predict class probabilities.

        Parameters:
        - X: Input features
        - validate_features: Validate feature names/types
        - base_margin: Base margin for each sample
        - iteration_range: Range of boosting rounds

        Returns:
        Class probabilities as numpy array
        """

    def predict_log_proba(self, X, **kwargs):
        """Predict log class probabilities."""

    @property
    def classes_(self):
        """Unique class labels."""

Ranking

XGBoost ranker for learning-to-rank problems.

class XGBRanker(XGBModel):
    def __init__(self, **kwargs):
        """
        XGBoost ranker for learning-to-rank.
        
        Inherits all parameters from XGBModel.
        Default objective: 'rank:pairwise'
        """

    def fit(
        self,
        X,
        y,
        group=None,
        qid=None,
        sample_weight=None,
        base_margin=None,
        eval_set=None,
        eval_group=None,
        eval_qid=None,
        eval_metric=None,
        early_stopping_rounds=None,
        verbose=True,
        xgb_model=None,
        sample_weight_eval_set=None,
        base_margin_eval_set=None,
        feature_weights=None,
        callbacks=None
    ):
        """
        Fit ranker to training data.

        Parameters:
        - X: Training features
        - y: Training relevance scores
        - group: Group sizes for queries
        - qid: Query IDs for each sample
        - (other parameters same as XGBModel.fit)

        Returns:
        Self
        """

    def predict(self, X, **kwargs):
        """Predict ranking scores."""

Random Forest Variants

XGBoost implementations of random forest algorithms.

class XGBRFRegressor(XGBModel):
    def __init__(self, **kwargs):
        """
        XGBoost random forest regressor.
        
        Configured with random forest defaults:
        - colsample_bynode=0.8
        - learning_rate=1.0
        - max_depth=None
        - n_estimators=100
        - num_parallel_tree=100
        - reg_lambda=1e-5
        - subsample=0.8
        """

    def fit(self, X, y, **kwargs):
        """Fit random forest regressor."""

    def predict(self, X, **kwargs):
        """Predict using random forest."""

class XGBRFClassifier(XGBModel):
    def __init__(self, **kwargs):
        """
        XGBoost random forest classifier.
        
        Same defaults as XGBRFRegressor with classification objective.
        """

    def fit(self, X, y, **kwargs):
        """Fit random forest classifier."""

    def predict(self, X, **kwargs):
        """Predict class labels using random forest."""

    def predict_proba(self, X, **kwargs):
        """Predict class probabilities using random forest."""

Usage Examples

Basic Classification

from xgboost import XGBClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train classifier
clf = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)

# Feature importance
importance = clf.feature_importances_

Regression with Early Stopping

from xgboost import XGBRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

# Load data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train with early stopping
reg = XGBRegressor(
    n_estimators=1000,
    max_depth=3,
    learning_rate=0.1,
    early_stopping_rounds=10
)

reg.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    verbose=False
)

# Predict
y_pred = reg.predict(X_test)

Install with Tessl CLI