Tessl Tile for pypi/lightgbm@4.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-training.md distributed-computing.md index.md sklearn-interface.md training-callbacks.md visualization.md

sklearn-interface.mddocs/

0
# Scikit-learn Interface
1

2
High-level, sklearn-compatible interface for gradient boosting tasks. These classes provide familiar scikit-learn APIs with automatic hyperparameter handling, data preprocessing, and integration with the broader sklearn ecosystem.
3

4
## Capabilities
5

6
### Base Model Interface
7

8
The foundational class that provides common functionality for all LightGBM sklearn-style estimators.
9

10
```python { .api }
11
class LGBMModel:
12
    """
13
    Base class for LightGBM sklearn-style estimators.
14
    
15
    Common parameters:
16
    - boosting_type: str, default='gbdt' - Type of boosting ('gbdt', 'dart', 'goss', 'rf')
17
    - num_leaves: int, default=31 - Maximum tree leaves for base learners
18
    - max_depth: int, default=-1 - Maximum tree depth for base learners (-1 means no limit)
19
    - learning_rate: float, default=0.1 - Boosting learning rate
20
    - n_estimators: int, default=100 - Number of boosted trees to fit
21
    - subsample_for_bin: int, default=200000 - Number of samples for constructing bins
22
    - objective: str or callable, default=None - Specify the learning task and loss function
23
    - class_weight: dict, 'balanced' or None, default=None - Weights associated with classes
24
    - min_split_gain: float, default=0. - Minimum loss reduction required to make split
25
    - min_child_weight: float, default=1e-3 - Minimum sum of instance weight in a child
26
    - min_child_samples: int, default=20 - Minimum number of data needed in a child
27
    - subsample: float, default=1. - Subsample ratio of the training instance
28
    - subsample_freq: int, default=0 - Frequency of subsample, <=0 means no enable
29
    - colsample_bytree: float, default=1. - Subsample ratio of columns when constructing each tree
30
    - reg_alpha: float, default=0. - L1 regularization term on weights
31
    - reg_lambda: float, default=0. - L2 regularization term on weights
32
    - random_state: int, RandomState object or None, default=None - Random number seed
33
    - n_jobs: int, default=None - Number of parallel threads
34
    - importance_type: str, default='split' - Feature importance type ('split', 'gain')
35
    """
36
    
37
    def fit(self, X, y, sample_weight=None, init_score=None, eval_set=None, 
38
            eval_names=None, eval_sample_weight=None, eval_init_score=None,
39
            eval_metric=None, feature_name='auto', categorical_feature='auto',
40
            early_stopping_rounds=None, verbose=True, log_evaluation=None,
41
            callbacks=None):
42
        """
43
        Fit the gradient boosting model.
44
        
45
        Parameters:
46
        - X: array-like, shape=(n_samples, n_features) - Input features
47
        - y: array-like, shape=(n_samples,) - Target values
48
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
49
        - init_score: array-like, shape=(n_samples,), optional - Initial prediction scores
50
        - eval_set: list of (X, y) tuples, optional - Evaluation datasets
51
        - eval_names: list of strings, optional - Names for evaluation datasets
52
        - eval_sample_weight: list of arrays, optional - Sample weights for evaluation sets
53
        - eval_init_score: list of arrays, optional - Initial scores for evaluation sets
54
        - eval_metric: str, list of str, or None, optional - Evaluation metrics
55
        - feature_name: list of strings or 'auto', optional - Feature names
56
        - categorical_feature: list of strings/ints or 'auto', optional - Categorical features
57
        - early_stopping_rounds: int or None, optional - Early stopping rounds
58
        - verbose: bool or int, optional - Controls verbosity of training
59
        - log_evaluation: bool, int, or None, optional - Evaluation logging frequency
60
        - callbacks: list of callback functions, optional - Custom callbacks
61
        
62
        Returns:
63
        - self: Returns self
64
        """
65
    
66
    def predict(self, X, num_iteration=None, **kwargs):
67
        """
68
        Make predictions on input data.
69
        
70
        Parameters:
71
        - X: array-like, shape=(n_samples, n_features) - Input features
72
        - num_iteration: int or None, optional - Limit number of iterations for prediction
73
        
74
        Returns:
75
        - array-like, shape=(n_samples,) - Predicted values
76
        """
77
    
78
    @property
79
    def booster_(self):
80
        """Get the underlying Booster object."""
81
    
82
    @property
83
    def feature_importances_(self):
84
        """Get feature importances array."""
85
    
86
    @property
87
    def feature_name_(self):
88
        """Get feature names list."""
89
    
90
    @property
91
    def n_features_(self):
92
        """Get number of features."""
93
    
94
    @property
95
    def objective_(self):
96
        """Get the concrete objective used by this model."""
97
```
98

99
### Regression
100

101
LightGBM regressor for continuous target variables. Optimized for regression tasks with support for various loss functions and evaluation metrics.
102

103
```python { .api }
104
class LGBMRegressor(LGBMModel):
105
    """
106
    LightGBM regressor for regression tasks.
107
    
108
    Additional parameters:
109
    - objective: str, default='regression' - Regression objective ('regression', 'regression_l1', 'huber', 'quantile', etc.)
110
    """
111
    
112
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
113
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
114
                 objective=None, class_weight=None, min_split_gain=0.,
115
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
116
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
117
                 reg_lambda=0., random_state=None, n_jobs=None,
118
                 importance_type='split', **kwargs):
119
        """Initialize LGBMRegressor with regression-specific defaults."""
120
    
121
    def fit(self, X, y, **kwargs):
122
        """Fit regression model. Inherits from LGBMModel.fit()."""
123
    
124
    def predict(self, X, num_iteration=None, **kwargs):
125
        """
126
        Predict regression target for X.
127
        
128
        Returns:
129
        - array-like, shape=(n_samples,) - Predicted regression values
130
        """
131
    
132
    def score(self, X, y, sample_weight=None):
133
        """
134
        Return the coefficient of determination R^2 of the prediction.
135
        
136
        Parameters:
137
        - X: array-like, shape=(n_samples, n_features) - Test samples
138
        - y: array-like, shape=(n_samples,) - True values for X
139
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
140
        
141
        Returns:
142
        - float: R^2 of self.predict(X) wrt. y
143
        """
144
```
145

146
### Classification
147

148
LightGBM classifier for discrete target variables. Supports both binary and multiclass classification with probability estimation and class prediction.
149

150
```python { .api }
151
class LGBMClassifier(LGBMModel):
152
    """
153
    LightGBM classifier for classification tasks.
154
    
155
    Additional parameters:
156
    - objective: str, default='binary' or 'multiclass' - Classification objective
157
    """
158
    
159
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
160
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
161
                 objective=None, class_weight=None, min_split_gain=0.,
162
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
163
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
164
                 reg_lambda=0., random_state=None, n_jobs=None,
165
                 importance_type='split', **kwargs):
166
        """Initialize LGBMClassifier with classification-specific defaults."""
167
    
168
    def fit(self, X, y, **kwargs):
169
        """Fit classification model. Inherits from LGBMModel.fit()."""
170
    
171
    def predict(self, X, num_iteration=None, **kwargs):
172
        """
173
        Predict class labels for X.
174
        
175
        Returns:
176
        - array-like, shape=(n_samples,) - Predicted class labels
177
        """
178
    
179
    def predict_proba(self, X, num_iteration=None, **kwargs):
180
        """
181
        Predict class probabilities for X.
182
        
183
        Returns:
184
        - array-like, shape=(n_samples, n_classes) - Class probabilities
185
        """
186
    
187
    def score(self, X, y, sample_weight=None):
188
        """
189
        Return the mean accuracy on the given test data and labels.
190
        
191
        Parameters:
192
        - X: array-like, shape=(n_samples, n_features) - Test samples
193
        - y: array-like, shape=(n_samples,) - True labels for X
194
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
195
        
196
        Returns:
197
        - float: Mean accuracy of self.predict(X) wrt. y
198
        """
199
    
200
    @property
201
    def classes_(self):
202
        """Get unique class labels."""
203
    
204
    @property
205
    def n_classes_(self):
206
        """Get number of classes."""
207
```
208

209
### Ranking
210

211
LightGBM ranker for learning-to-rank tasks. Optimized for ranking scenarios where the goal is to order items rather than predict absolute values.
212

213
```python { .api }
214
class LGBMRanker(LGBMModel):
215
    """
216
    LightGBM ranker for learning-to-rank tasks.
217
    
218
    Additional parameters:
219
    - objective: str, default='rank_xendcg' - Ranking objective ('lambdarank', 'rank_xendcg')
220
    """
221
    
222
    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
223
                 learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,
224
                 objective=None, class_weight=None, min_split_gain=0.,
225
                 min_child_weight=1e-3, min_child_samples=20, subsample=1.,
226
                 subsample_freq=0, colsample_bytree=1., reg_alpha=0.,
227
                 reg_lambda=0., random_state=None, n_jobs=None,
228
                 importance_type='split', **kwargs):
229
        """Initialize LGBMRanker with ranking-specific defaults."""
230
    
231
    def fit(self, X, y, group=None, **kwargs):
232
        """
233
        Fit ranking model.
234
        
235
        Parameters:
236
        - X: array-like, shape=(n_samples, n_features) - Input features
237
        - y: array-like, shape=(n_samples,) - Target ranking scores
238
        - group: array-like, shape=(n_groups,) - Group/query sizes for ranking
239
        """
240
    
241
    def predict(self, X, num_iteration=None, **kwargs):
242
        """
243
        Predict ranking scores for X.
244
        
245
        Returns:
246
        - array-like, shape=(n_samples,) - Predicted ranking scores
247
        """
248
    
249
    def score(self, X, y, sample_weight=None):
250
        """
251
        Return the ranking evaluation score.
252
        
253
        Parameters:
254
        - X: array-like, shape=(n_samples, n_features) - Test samples
255
        - y: array-like, shape=(n_samples,) - True ranking scores for X
256
        - sample_weight: array-like, shape=(n_samples,), optional - Sample weights
257
        
258
        Returns:
259
        - float: Ranking evaluation score
260
        """
261
```
262

263
## Usage Examples
264

265
### Regression Example
266

267
```python
268
import lightgbm as lgb
269
from sklearn.datasets import load_boston
270
from sklearn.model_selection import train_test_split
271
from sklearn.metrics import mean_squared_error, r2_score
272

273
# Load data
274
X, y = load_boston(return_X_y=True)
275
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
276

277
# Initialize and train regressor
278
regressor = lgb.LGBMRegressor(
279
    objective='regression',
280
    n_estimators=100,
281
    learning_rate=0.1,
282
    num_leaves=31,
283
    random_state=42
284
)
285

286
regressor.fit(
287
    X_train, y_train,
288
    eval_set=[(X_test, y_test)],
289
    eval_metric='l2',
290
    early_stopping_rounds=10,
291
    verbose=False
292
)
293

294
# Make predictions
295
predictions = regressor.predict(X_test)
296
print(f"R² Score: {r2_score(y_test, predictions):.4f}")
297
print(f"RMSE: {mean_squared_error(y_test, predictions, squared=False):.4f}")
298
```
299

300
### Classification Example
301

302
```python
303
import lightgbm as lgb
304
from sklearn.datasets import load_iris
305
from sklearn.model_selection import train_test_split
306
from sklearn.metrics import accuracy_score, classification_report
307

308
# Load data
309
X, y = load_iris(return_X_y=True)
310
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
311

312
# Initialize and train classifier
313
classifier = lgb.LGBMClassifier(
314
    objective='multiclass',
315
    n_estimators=100,
316
    learning_rate=0.1,
317
    num_leaves=31,
318
    random_state=42
319
)
320

321
classifier.fit(
322
    X_train, y_train,
323
    eval_set=[(X_test, y_test)],
324
    eval_metric='multi_logloss',
325
    early_stopping_rounds=10,
326
    verbose=False
327
)
328

329
# Make predictions
330
predictions = classifier.predict(X_test)
331
probabilities = classifier.predict_proba(X_test)
332

333
print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")
334
print(f"Classes: {classifier.classes_}")
335
print(f"Feature Importances: {classifier.feature_importances_}")
336
```
337

338
### Ranking Example
339

340
```python
341
import lightgbm as lgb
342
import numpy as np
343
from sklearn.datasets import make_regression
344

345
# Create ranking data
346
X, y = make_regression(n_samples=1000, n_features=10, random_state=42)
347
# Create groups for ranking (query sizes)
348
group = np.random.randint(10, 50, size=20)  # 20 queries with varying sizes
349
group = group[group.cumsum() <= 1000]  # Ensure total doesn't exceed samples
350

351
# Initialize and train ranker
352
ranker = lgb.LGBMRanker(
353
    objective='rank_xendcg',
354
    n_estimators=100,
355
    learning_rate=0.1,
356
    num_leaves=31,
357
    random_state=42
358
)
359

360
ranker.fit(X, y, group=group)
361

362
# Make predictions
363
ranking_scores = ranker.predict(X)
364
print(f"Ranking scores shape: {ranking_scores.shape}")
365
print(f"Sample ranking scores: {ranking_scores[:10]}")
366
```

Version

Tile

Files

sklearn-interface.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

sklearn-interface.mddocs/