LightGBM is a gradient boosting framework that uses tree-based learning algorithms, designed to be distributed and efficient with faster training speed, higher efficiency, lower memory usage, better accuracy, and support for parallel, distributed, and GPU learning.
—
High-level, sklearn-compatible interface for gradient boosting tasks. These classes provide familiar scikit-learn APIs with automatic hyperparameter handling, data preprocessing, and integration with the broader sklearn ecosystem.
The foundational class that provides common functionality for all LightGBM sklearn-style estimators.
class LGBMModel:
"""
Base class for LightGBM sklearn-style estimators.
Common parameters:
- boosting_type: str, default='gbdt' - Type of boosting ('gbdt', 'dart', 'goss', 'rf')
- num_leaves: int, default=31 - Maximum tree leaves for base learners
- max_depth: int, default=-1 - Maximum tree depth for base learners (-1 means no limit)
- learning_rate: float, default=0.1 - Boosting learning rate
- n_estimators: int, default=100 - Number of boosted trees to fit
- subsample_for_bin: int, default=200000 - Number of samples for constructing bins
- objective: str or callable, default=None - Specify the learning task and loss function
- class_weight: dict, 'balanced' or None, default=None - Weights associated with classes
- min_split_gain: float, default=0. - Minimum loss reduction required to make split
- min_child_weight: float, default=1e-3 - Minimum sum of instance weight in a child
- min_child_samples: int, default=20 - Minimum number of data needed in a child
- subsample: float, default=1. - Subsample ratio of the training instance
- subsample_freq: int, default=0 - Frequency of subsample, <=0 means no enable
- colsample_bytree: float, default=1. - Subsample ratio of columns when constructing each tree
- reg_alpha: float, default=0. - L1 regularization term on weights
- reg_lambda: float, default=0. - L2 regularization term on weights
- random_state: int, RandomState object or None, default=None - Random number seed
- n_jobs: int, default=None - Number of parallel threads
- importance_type: str, default='split' - Feature importance type ('split', 'gain')
"""
def fit(self, X, y, sample_weight=None, init_score=None, eval_set=None,
eval_names=None, eval_sample_weight=None, eval_init_score=None,
eval_metric=None, feature_name='auto', categorical_feature='auto',
early_stopping_rounds=None, verbose=True, log_evaluation=None,
callbacks=None):
"""
Fit the gradient boosting model.
Parameters:
- X: array-like, shape=(n_samples, n_features) - Input features
- y: array-like, shape=(n_samples,) - Target values
- sample_weight: array-like, shape=(n_samples,), optional - Sample weights
- init_score: array-like, shape=(n_samples,), optional - Initial prediction scores
- eval_set: list of (X, y) tuples, optional - Evaluation datasets
- eval_names: list of strings, optional - Names for evaluation datasets
- eval_sample_weight: list of arrays, optional - Sample weights for evaluation sets
- eval_init_score: list of arrays, optional - Initial scores for evaluation sets
- eval_metric: str, list of str, or None, optional - Evaluation metrics
- feature_name: list of strings or 'auto', optional - Feature names
- categorical_feature: list of strings/ints or 'auto', optional - Categorical features
- early_stopping_rounds: int or None, optional - Early stopping rounds
- verbose: bool or int, optional - Controls verbosity of training
- log_evaluation: bool, int, or None, optional - Evaluation logging frequency
- callbacks: list of callback functions, optional - Custom callbacks
Returns:
- self: Returns self
"""
def predict(self, X, num_iteration=None, **kwargs):
"""
Make predictions on input data.
Parameters:
- X: array-like, shape=(n_samples, n_features) - Input features
- num_iteration: int or None, optional - Limit number of iterations for prediction
Returns:
- array-like, shape=(n_samples,) - Predicted values
"""
@property
def booster_(self):
"""Get the underlying Booster object."""
@property
def feature_importances_(self):
"""Get feature importances array."""
@property
def feature_name_(self):
"""Get feature names list."""
@property
def n_features_(self):
"""Get number of features."""
@property
def objective_(self):
"""Get the concrete objective used by this model."""LightGBM regressor for continuous target variables. Optimized for regression tasks with support for various loss functions and evaluation metrics.
class LGBMRegressor(LGBMModel):
"""
LightGBM regressor for regression tasks.
Additional parameters:
- objective: str, default='regression' - Regression objective ('regression', 'regression_l1', 'huber', 'quantile', etc.)
"""
def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
objective=None, class_weight=None, min_split_gain=0.,
min_child_weight=1e-3, min_child_samples=20, subsample=1.,
subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
reg_lambda=0., random_state=None, n_jobs=None,
importance_type='split', **kwargs):
"""Initialize LGBMRegressor with regression-specific defaults."""
def fit(self, X, y, **kwargs):
"""Fit regression model. Inherits from LGBMModel.fit()."""
def predict(self, X, num_iteration=None, **kwargs):
"""
Predict regression target for X.
Returns:
- array-like, shape=(n_samples,) - Predicted regression values
"""
def score(self, X, y, sample_weight=None):
"""
Return the coefficient of determination R^2 of the prediction.
Parameters:
- X: array-like, shape=(n_samples, n_features) - Test samples
- y: array-like, shape=(n_samples,) - True values for X
- sample_weight: array-like, shape=(n_samples,), optional - Sample weights
Returns:
- float: R^2 of self.predict(X) wrt. y
"""LightGBM classifier for discrete target variables. Supports both binary and multiclass classification with probability estimation and class prediction.
class LGBMClassifier(LGBMModel):
"""
LightGBM classifier for classification tasks.
Additional parameters:
- objective: str, default='binary' or 'multiclass' - Classification objective
"""
def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
objective=None, class_weight=None, min_split_gain=0.,
min_child_weight=1e-3, min_child_samples=20, subsample=1.,
subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
reg_lambda=0., random_state=None, n_jobs=None,
importance_type='split', **kwargs):
"""Initialize LGBMClassifier with classification-specific defaults."""
def fit(self, X, y, **kwargs):
"""Fit classification model. Inherits from LGBMModel.fit()."""
def predict(self, X, num_iteration=None, **kwargs):
"""
Predict class labels for X.
Returns:
- array-like, shape=(n_samples,) - Predicted class labels
"""
def predict_proba(self, X, num_iteration=None, **kwargs):
"""
Predict class probabilities for X.
Returns:
- array-like, shape=(n_samples, n_classes) - Class probabilities
"""
def score(self, X, y, sample_weight=None):
"""
Return the mean accuracy on the given test data and labels.
Parameters:
- X: array-like, shape=(n_samples, n_features) - Test samples
- y: array-like, shape=(n_samples,) - True labels for X
- sample_weight: array-like, shape=(n_samples,), optional - Sample weights
Returns:
- float: Mean accuracy of self.predict(X) wrt. y
"""
@property
def classes_(self):
"""Get unique class labels."""
@property
def n_classes_(self):
"""Get number of classes."""LightGBM ranker for learning-to-rank tasks. Optimized for ranking scenarios where the goal is to order items rather than predict absolute values.
class LGBMRanker(LGBMModel):
"""
LightGBM ranker for learning-to-rank tasks.
Additional parameters:
- objective: str, default='rank_xendcg' - Ranking objective ('lambdarank', 'rank_xendcg')
"""
def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
objective=None, class_weight=None, min_split_gain=0.,
min_child_weight=1e-3, min_child_samples=20, subsample=1.,
subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
reg_lambda=0., random_state=None, n_jobs=None,
importance_type='split', **kwargs):
"""Initialize LGBMRanker with ranking-specific defaults."""
def fit(self, X, y, group=None, **kwargs):
"""
Fit ranking model.
Parameters:
- X: array-like, shape=(n_samples, n_features) - Input features
- y: array-like, shape=(n_samples,) - Target ranking scores
- group: array-like, shape=(n_groups,) - Group/query sizes for ranking
"""
def predict(self, X, num_iteration=None, **kwargs):
"""
Predict ranking scores for X.
Returns:
- array-like, shape=(n_samples,) - Predicted ranking scores
"""
def score(self, X, y, sample_weight=None):
"""
Return the ranking evaluation score.
Parameters:
- X: array-like, shape=(n_samples, n_features) - Test samples
- y: array-like, shape=(n_samples,) - True ranking scores for X
- sample_weight: array-like, shape=(n_samples,), optional - Sample weights
Returns:
- float: Ranking evaluation score
"""import lightgbm as lgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# Load data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and train regressor
regressor = lgb.LGBMRegressor(
objective='regression',
n_estimators=100,
learning_rate=0.1,
num_leaves=31,
random_state=42
)
regressor.fit(
X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric='l2',
early_stopping_rounds=10,
verbose=False
)
# Make predictions
predictions = regressor.predict(X_test)
print(f"R² Score: {r2_score(y_test, predictions):.4f}")
print(f"RMSE: {mean_squared_error(y_test, predictions, squared=False):.4f}")import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and train classifier
classifier = lgb.LGBMClassifier(
objective='multiclass',
n_estimators=100,
learning_rate=0.1,
num_leaves=31,
random_state=42
)
classifier.fit(
X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric='multi_logloss',
early_stopping_rounds=10,
verbose=False
)
# Make predictions
predictions = classifier.predict(X_test)
probabilities = classifier.predict_proba(X_test)
print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")
print(f"Classes: {classifier.classes_}")
print(f"Feature Importances: {classifier.feature_importances_}")import lightgbm as lgb
import numpy as np
from sklearn.datasets import make_regression
# Create ranking data
X, y = make_regression(n_samples=1000, n_features=10, random_state=42)
# Create groups for ranking (query sizes)
group = np.random.randint(10, 50, size=20) # 20 queries with varying sizes
group = group[group.cumsum() <= 1000] # Ensure total doesn't exceed samples
# Initialize and train ranker
ranker = lgb.LGBMRanker(
objective='rank_xendcg',
n_estimators=100,
learning_rate=0.1,
num_leaves=31,
random_state=42
)
ranker.fit(X, y, group=group)
# Make predictions
ranking_scores = ranker.predict(X)
print(f"Ranking scores shape: {ranking_scores.shape}")
print(f"Sample ranking scores: {ranking_scores[:10]}")Install with Tessl CLI
npx tessl i tessl/pypi-lightgbm