CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-lightgbm

LightGBM is a gradient boosting framework that uses tree-based learning algorithms, designed to be distributed and efficient with faster training speed, higher efficiency, lower memory usage, better accuracy, and support for parallel, distributed, and GPU learning.

Pending
Overview
Eval results
Files

sklearn-interface.mddocs/

Scikit-learn Interface

High-level, sklearn-compatible interface for gradient boosting tasks. These classes provide familiar scikit-learn APIs with automatic hyperparameter handling, data preprocessing, and integration with the broader sklearn ecosystem.

Capabilities

Base Model Interface

The foundational class that provides common functionality for all LightGBM sklearn-style estimators.

class LGBMModel:
    """
    Base class for LightGBM sklearn-style estimators.
    
    Common parameters:
    - boosting_type: str, default='gbdt' - Type of boosting ('gbdt', 'dart', 'goss', 'rf')
    - num_leaves: int, default=31 - Maximum tree leaves for base learners
    - max_depth: int, default=-1 - Maximum tree depth for base learners (-1 means no limit)
    - learning_rate: float, default=0.1 - Boosting learning rate
    - n_estimators: int, default=100 - Number of boosted trees to fit
    - subsample_for_bin: int, default=200000 - Number of samples for constructing bins
    - objective: str or callable, default=None - Specify the learning task and loss function
    - class_weight: dict, 'balanced' or None, default=None - Weights associated with classes
    - min_split_gain: float, default=0. - Minimum loss reduction required to make split
    - min_child_weight: float, default=1e-3 - Minimum sum of instance weight in a child
    - min_child_samples: int, default=20 - Minimum number of data needed in a child
    - subsample: float, default=1. - Subsample ratio of the training instance
    - subsample_freq: int, default=0 - Frequency of subsample, <=0 means no enable
    - colsample_bytree: float, default=1. - Subsample ratio of columns when constructing each tree
    - reg_alpha: float, default=0. - L1 regularization term on weights
    - reg_lambda: float, default=0. - L2 regularization term on weights
    - random_state: int, RandomState object or None, default=None - Random number seed
    - n_jobs: int, default=None - Number of parallel threads
    - importance_type: str, default='split' - Feature importance type ('split', 'gain')
    """
    
    def fit(self, X, y, sample_weight=None, init_score=None, eval_set=None, 
            eval_names=None, eval_sample_weight=None, eval_init_score=None,
            eval_metric=None, feature_name='auto', categorical_feature='auto',
            early_stopping_rounds=None, verbose=True, log_evaluation=None,
            callbacks=None):
        """
        Fit the gradient boosting model.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Input features
        - y: array-like, shape=(n_samples,) - Target values
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        - init_score: array-like, shape=(n_samples,), optional - Initial prediction scores
        - eval_set: list of (X, y) tuples, optional - Evaluation datasets
        - eval_names: list of strings, optional - Names for evaluation datasets
        - eval_sample_weight: list of arrays, optional - Sample weights for evaluation sets
        - eval_init_score: list of arrays, optional - Initial scores for evaluation sets
        - eval_metric: str, list of str, or None, optional - Evaluation metrics
        - feature_name: list of strings or 'auto', optional - Feature names
        - categorical_feature: list of strings/ints or 'auto', optional - Categorical features
        - early_stopping_rounds: int or None, optional - Early stopping rounds
        - verbose: bool or int, optional - Controls verbosity of training
        - log_evaluation: bool, int, or None, optional - Evaluation logging frequency
        - callbacks: list of callback functions, optional - Custom callbacks
        
        Returns:
        - self: Returns self
        """
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Make predictions on input data.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Input features
        - num_iteration: int or None, optional - Limit number of iterations for prediction
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted values
        """
    
    @property
    def booster_(self):
        """Get the underlying Booster object."""
    
    @property
    def feature_importances_(self):
        """Get feature importances array."""
    
    @property
    def feature_name_(self):
        """Get feature names list."""
    
    @property
    def n_features_(self):
        """Get number of features."""
    
    @property
    def objective_(self):
        """Get the concrete objective used by this model."""

Regression

LightGBM regressor for continuous target variables. Optimized for regression tasks with support for various loss functions and evaluation metrics.

class LGBMRegressor(LGBMModel):
    """
    LightGBM regressor for regression tasks.
    
    Additional parameters:
    - objective: str, default='regression' - Regression objective ('regression', 'regression_l1', 'huber', 'quantile', etc.)
    """
    
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
                 objective=None, class_weight=None, min_split_gain=0.,
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
                 reg_lambda=0., random_state=None, n_jobs=None,
                 importance_type='split', **kwargs):
        """Initialize LGBMRegressor with regression-specific defaults."""
    
    def fit(self, X, y, **kwargs):
        """Fit regression model. Inherits from LGBMModel.fit()."""
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Predict regression target for X.
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted regression values
        """
    
    def score(self, X, y, sample_weight=None):
        """
        Return the coefficient of determination R^2 of the prediction.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Test samples
        - y: array-like, shape=(n_samples,) - True values for X
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        
        Returns:
        - float: R^2 of self.predict(X) wrt. y
        """

Classification

LightGBM classifier for discrete target variables. Supports both binary and multiclass classification with probability estimation and class prediction.

class LGBMClassifier(LGBMModel):
    """
    LightGBM classifier for classification tasks.
    
    Additional parameters:
    - objective: str, default='binary' or 'multiclass' - Classification objective
    """
    
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
                 objective=None, class_weight=None, min_split_gain=0.,
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
                 reg_lambda=0., random_state=None, n_jobs=None,
                 importance_type='split', **kwargs):
        """Initialize LGBMClassifier with classification-specific defaults."""
    
    def fit(self, X, y, **kwargs):
        """Fit classification model. Inherits from LGBMModel.fit()."""
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Predict class labels for X.
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted class labels
        """
    
    def predict_proba(self, X, num_iteration=None, **kwargs):
        """
        Predict class probabilities for X.
        
        Returns:
        - array-like, shape=(n_samples, n_classes) - Class probabilities
        """
    
    def score(self, X, y, sample_weight=None):
        """
        Return the mean accuracy on the given test data and labels.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Test samples
        - y: array-like, shape=(n_samples,) - True labels for X
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        
        Returns:
        - float: Mean accuracy of self.predict(X) wrt. y
        """
    
    @property
    def classes_(self):
        """Get unique class labels."""
    
    @property
    def n_classes_(self):
        """Get number of classes."""

Ranking

LightGBM ranker for learning-to-rank tasks. Optimized for ranking scenarios where the goal is to order items rather than predict absolute values.

class LGBMRanker(LGBMModel):
    """
    LightGBM ranker for learning-to-rank tasks.
    
    Additional parameters:
    - objective: str, default='rank_xendcg' - Ranking objective ('lambdarank', 'rank_xendcg')
    """
    
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
                 objective=None, class_weight=None, min_split_gain=0.,
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
                 reg_lambda=0., random_state=None, n_jobs=None,
                 importance_type='split', **kwargs):
        """Initialize LGBMRanker with ranking-specific defaults."""
    
    def fit(self, X, y, group=None, **kwargs):
        """
        Fit ranking model.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Input features
        - y: array-like, shape=(n_samples,) - Target ranking scores
        - group: array-like, shape=(n_groups,) - Group/query sizes for ranking
        """
    
    def predict(self, X, num_iteration=None, **kwargs):
        """
        Predict ranking scores for X.
        
        Returns:
        - array-like, shape=(n_samples,) - Predicted ranking scores
        """
    
    def score(self, X, y, sample_weight=None):
        """
        Return the ranking evaluation score.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features) - Test samples
        - y: array-like, shape=(n_samples,) - True ranking scores for X
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
        
        Returns:
        - float: Ranking evaluation score
        """

Usage Examples

Regression Example

import lightgbm as lgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train regressor
regressor = lgb.LGBMRegressor(
    objective='regression',
    n_estimators=100,
    learning_rate=0.1,
    num_leaves=31,
    random_state=42
)

regressor.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='l2',
    early_stopping_rounds=10,
    verbose=False
)

# Make predictions
predictions = regressor.predict(X_test)
print(f"R² Score: {r2_score(y_test, predictions):.4f}")
print(f"RMSE: {mean_squared_error(y_test, predictions, squared=False):.4f}")

Classification Example

import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train classifier
classifier = lgb.LGBMClassifier(
    objective='multiclass',
    n_estimators=100,
    learning_rate=0.1,
    num_leaves=31,
    random_state=42
)

classifier.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='multi_logloss',
    early_stopping_rounds=10,
    verbose=False
)

# Make predictions
predictions = classifier.predict(X_test)
probabilities = classifier.predict_proba(X_test)

print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")
print(f"Classes: {classifier.classes_}")
print(f"Feature Importances: {classifier.feature_importances_}")

Ranking Example

import lightgbm as lgb
import numpy as np
from sklearn.datasets import make_regression

# Create ranking data
X, y = make_regression(n_samples=1000, n_features=10, random_state=42)
# Create groups for ranking (query sizes)
group = np.random.randint(10, 50, size=20)  # 20 queries with varying sizes
group = group[group.cumsum() <= 1000]  # Ensure total doesn't exceed samples

# Initialize and train ranker
ranker = lgb.LGBMRanker(
    objective='rank_xendcg',
    n_estimators=100,
    learning_rate=0.1,
    num_leaves=31,
    random_state=42
)

ranker.fit(X, y, group=group)

# Make predictions
ranking_scores = ranker.predict(X)
print(f"Ranking scores shape: {ranking_scores.shape}")
print(f"Sample ranking scores: {ranking_scores[:10]}")

Install with Tessl CLI

npx tessl i tessl/pypi-lightgbm

docs

core-training.md

distributed-computing.md

index.md

sklearn-interface.md

training-callbacks.md

visualization.md

tile.json