tessl/pypi-lightgbm

LightGBM is a gradient boosting framework that uses tree-based learning algorithms, designed to be distributed and efficient with faster training speed, higher efficiency, lower memory usage, better accuracy, and support for parallel, distributed, and GPU learning.

—

Pending

Overview

Eval results

Files

Scikit-learn Interface

Name: tessl/pypi-lightgbm
Author: tessl

High-level, sklearn-compatible interface for gradient boosting tasks. These classes provide familiar scikit-learn APIs with automatic hyperparameter handling, data preprocessing, and integration with the broader sklearn ecosystem.

Capabilities

Base Model Interface

The foundational class that provides common functionality for all LightGBM sklearn-style estimators.

class LGBMModel:
    """
    Base class for LightGBM sklearn-style estimators.
    
    Common parameters:
    - boosting_type: str, default='gbdt' - Type of boosting ('gbdt', 'dart', 'goss', 'rf')
    - num_leaves: int, default=31 - Maximum tree leaves for base learners
    - max_depth: int, default=-1 - Maximum tree depth for base learners (-1 means no limit)
    - learning_rate: float, default=0.1 - Boosting learning rate
    - n_estimators: int, default=100 - Number of boosted trees to fit
    - subsample_for_bin: int, default=200000 - Number of samples for constructing bins
    - objective: str or callable, default=None - Specify the learning task and loss function
    - class_weight: dict, 'balanced' or None, default=None - Weights associated with classes
    - min_split_gain: float, default=0. - Minimum loss reduction required to make split
    - min_child_weight: float, default=1e-3 - Minimum sum of instance weight in a child
    - min_child_samples: int, default=20 - Minimum number of data needed in a child
    - subsample: float, default=1. - Subsample ratio of the training instance
    - subsample_freq: int, default=0 - Frequency of subsample, <=0 means no enable
    - colsample_bytree: float, default=1. - Subsample ratio of columns when constructing each tree
    - reg_alpha: float, default=0. - L1 regularization term on weights
    - reg_lambda: float, default=0. - L2 regularization term on weights
    - random_state: int, RandomState object or None, default=None - Random number seed
    - n_jobs: int, default=None - Number of parallel threads
    - importance_type: str, default='split' - Feature importance type ('split', 'gain')
    """
    
    def fit(self, X, y, sample_weight=None, init_score=None, eval_set=None, 
            eval_names=None, eval_sample_weight=None, eval_init_score=None,
            eval_metric=None, feature_name='auto', categorical_feature='auto',
            early_stopping_rounds=None, verbose=True, log_evaluation=None,
            callbacks=None):
        """
        Fit the gradient boosting model.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Input features
        - y: array-like, shape=(n_samples,) - Target values
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        - init_score: array-like, shape=(n_samples,), optional - Initial prediction scores
        - eval_set: list of (X, y) tuples, optional - Evaluation datasets
        - eval_names: list of strings, optional - Names for evaluation datasets
        - eval_sample_weight: list of arrays, optional - Sample weights for evaluation sets
        - eval_init_score: list of arrays, optional - Initial scores for evaluation sets
        - eval_metric: str, list of str, or None, optional - Evaluation metrics
        - feature_name: list of strings or 'auto', optional - Feature names
        - categorical_feature: list of strings/ints or 'auto', optional - Categorical features
        - early_stopping_rounds: int or None, optional - Early stopping rounds
        - verbose: bool or int, optional - Controls verbosity of training
        - log_evaluation: bool, int, or None, optional - Evaluation logging frequency
        - callbacks: list of callback functions, optional - Custom callbacks
        
        Returns:
        - self: Returns self
        """
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Make predictions on input data.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Input features
        - num_iteration: int or None, optional - Limit number of iterations for prediction
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted values
        """
    
    @property
    def booster_(self):
        """Get the underlying Booster object."""
    
    @property
    def feature_importances_(self):
        """Get feature importances array."""
    
    @property
    def feature_name_(self):
        """Get feature names list."""
    
    @property
    def n_features_(self):
        """Get number of features."""
    
    @property
    def objective_(self):
        """Get the concrete objective used by this model."""

Regression

LightGBM regressor for continuous target variables. Optimized for regression tasks with support for various loss functions and evaluation metrics.

class LGBMRegressor(LGBMModel):
    """
    LightGBM regressor for regression tasks.
    
    Additional parameters:
    - objective: str, default='regression' - Regression objective ('regression', 'regression_l1', 'huber', 'quantile', etc.)
    """
    
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
                 objective=None, class_weight=None, min_split_gain=0.,
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
                 reg_lambda=0., random_state=None, n_jobs=None,
                 importance_type='split', **kwargs):
        """Initialize LGBMRegressor with regression-specific defaults."""
    
    def fit(self, X, y, **kwargs):
        """Fit regression model. Inherits from LGBMModel.fit()."""
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Predict regression target for X.
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted regression values
        """
    
    def score(self, X, y, sample_weight=None):
        """
        Return the coefficient of determination R^2 of the prediction.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Test samples
        - y: array-like, shape=(n_samples,) - True values for X
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        
        Returns:
        - float: R^2 of self.predict(X) wrt. y
        """

Classification

LightGBM classifier for discrete target variables. Supports both binary and multiclass classification with probability estimation and class prediction.

class LGBMClassifier(LGBMModel):
    """
    LightGBM classifier for classification tasks.
    
    Additional parameters:
    - objective: str, default='binary' or 'multiclass' - Classification objective
    """
    
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
                 objective=None, class_weight=None, min_split_gain=0.,
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
                 reg_lambda=0., random_state=None, n_jobs=None,
                 importance_type='split', **kwargs):
        """Initialize LGBMClassifier with classification-specific defaults."""
    
    def fit(self, X, y, **kwargs):
        """Fit classification model. Inherits from LGBMModel.fit()."""
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Predict class labels for X.
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted class labels
        """
    
    def predict_proba(self, X, num_iteration=None, **kwargs):
        """
        Predict class probabilities for X.
        
        Returns:
        - array-like, shape=(n_samples, n_classes) - Class probabilities
        """
    
    def score(self, X, y, sample_weight=None):
        """
        Return the mean accuracy on the given test data and labels.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Test samples
        - y: array-like, shape=(n_samples,) - True labels for X
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        
        Returns:
        - float: Mean accuracy of self.predict(X) wrt. y
        """
    
    @property
    def classes_(self):
        """Get unique class labels."""
    
    @property
    def n_classes_(self):
        """Get number of classes."""

Ranking

LightGBM ranker for learning-to-rank tasks. Optimized for ranking scenarios where the goal is to order items rather than predict absolute values.

class LGBMRanker(LGBMModel):
    """
    LightGBM ranker for learning-to-rank tasks.
    
    Additional parameters:
    - objective: str, default='rank_xendcg' - Ranking objective ('lambdarank', 'rank_xendcg')
    """
    
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
                 objective=None, class_weight=None, min_split_gain=0.,
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
                 reg_lambda=0., random_state=None, n_jobs=None,
                 importance_type='split', **kwargs):
        """Initialize LGBMRanker with ranking-specific defaults."""
    
    def fit(self, X, y, group=None, **kwargs):
        """
        Fit ranking model.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Input features
        - y: array-like, shape=(n_samples,) - Target ranking scores
        - group: array-like, shape=(n_groups,) - Group/query sizes for ranking
        """
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Predict ranking scores for X.
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted ranking scores
        """
    
    def score(self, X, y, sample_weight=None):
        """
        Return the ranking evaluation score.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Test samples
        - y: array-like, shape=(n_samples,) - True ranking scores for X
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        
        Returns:
        - float: Ranking evaluation score
        """

Usage Examples

Regression Example

import lightgbm as lgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train regressor
regressor = lgb.LGBMRegressor(
    objective='regression',
    n_estimators=100,
    learning_rate=0.1,
    num_leaves=31,
    random_state=42
)

regressor.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='l2',
    early_stopping_rounds=10,
    verbose=False
)

# Make predictions
predictions = regressor.predict(X_test)
print(f"R² Score: {r2_score(y_test, predictions):.4f}")
print(f"RMSE: {mean_squared_error(y_test, predictions, squared=False):.4f}")

Classification Example

import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train classifier
classifier = lgb.LGBMClassifier(
    objective='multiclass',
    n_estimators=100,
    learning_rate=0.1,
    num_leaves=31,
    random_state=42
)

classifier.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='multi_logloss',
    early_stopping_rounds=10,
    verbose=False
)

# Make predictions
predictions = classifier.predict(X_test)
probabilities = classifier.predict_proba(X_test)

print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")
print(f"Classes: {classifier.classes_}")
print(f"Feature Importances: {classifier.feature_importances_}")

Ranking Example

import lightgbm as lgb
import numpy as np
from sklearn.datasets import make_regression

# Create ranking data
X, y = make_regression(n_samples=1000, n_features=10, random_state=42)
# Create groups for ranking (query sizes)
group = np.random.randint(10, 50, size=20)  # 20 queries with varying sizes
group = group[group.cumsum() <= 1000]  # Ensure total doesn't exceed samples

# Initialize and train ranker
ranker = lgb.LGBMRanker(
    objective='rank_xendcg',
    n_estimators=100,
    learning_rate=0.1,
    num_leaves=31,
    random_state=42
)

ranker.fit(X, y, group=group)

# Make predictions
ranking_scores = ranker.predict(X)
print(f"Ranking scores shape: {ranking_scores.shape}")
print(f"Sample ranking scores: {ranking_scores[:10]}")

Install with Tessl CLI