or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-training.mddistributed-computing.mdindex.mdsklearn-interface.mdtraining-callbacks.mdvisualization.md

sklearn-interface.mddocs/

0

# Scikit-learn Interface

1

2

High-level, sklearn-compatible interface for gradient boosting tasks. These classes provide familiar scikit-learn APIs with automatic hyperparameter handling, data preprocessing, and integration with the broader sklearn ecosystem.

3

4

## Capabilities

5

6

### Base Model Interface

7

8

The foundational class that provides common functionality for all LightGBM sklearn-style estimators.

9

10

```python { .api }

11

class LGBMModel:

12

"""

13

Base class for LightGBM sklearn-style estimators.

14

15

Common parameters:

16

- boosting_type: str, default='gbdt' - Type of boosting ('gbdt', 'dart', 'goss', 'rf')

17

- num_leaves: int, default=31 - Maximum tree leaves for base learners

18

- max_depth: int, default=-1 - Maximum tree depth for base learners (-1 means no limit)

19

- learning_rate: float, default=0.1 - Boosting learning rate

20

- n_estimators: int, default=100 - Number of boosted trees to fit

21

- subsample_for_bin: int, default=200000 - Number of samples for constructing bins

22

- objective: str or callable, default=None - Specify the learning task and loss function

23

- class_weight: dict, 'balanced' or None, default=None - Weights associated with classes

24

- min_split_gain: float, default=0. - Minimum loss reduction required to make split

25

- min_child_weight: float, default=1e-3 - Minimum sum of instance weight in a child

26

- min_child_samples: int, default=20 - Minimum number of data needed in a child

27

- subsample: float, default=1. - Subsample ratio of the training instance

28

- subsample_freq: int, default=0 - Frequency of subsample, <=0 means no enable

29

- colsample_bytree: float, default=1. - Subsample ratio of columns when constructing each tree

30

- reg_alpha: float, default=0. - L1 regularization term on weights

31

- reg_lambda: float, default=0. - L2 regularization term on weights

32

- random_state: int, RandomState object or None, default=None - Random number seed

33

- n_jobs: int, default=None - Number of parallel threads

34

- importance_type: str, default='split' - Feature importance type ('split', 'gain')

35

"""

36

37

def fit(self, X, y, sample_weight=None, init_score=None, eval_set=None,

38

eval_names=None, eval_sample_weight=None, eval_init_score=None,

39

eval_metric=None, feature_name='auto', categorical_feature='auto',

40

early_stopping_rounds=None, verbose=True, log_evaluation=None,

41

callbacks=None):

42

"""

43

Fit the gradient boosting model.

44

45

Parameters:

46

- X: array-like, shape=(n_samples, n_features) - Input features

47

- y: array-like, shape=(n_samples,) - Target values

48

- sample_weight: array-like, shape=(n_samples,), optional - Sample weights

49

- init_score: array-like, shape=(n_samples,), optional - Initial prediction scores

50

- eval_set: list of (X, y) tuples, optional - Evaluation datasets

51

- eval_names: list of strings, optional - Names for evaluation datasets

52

- eval_sample_weight: list of arrays, optional - Sample weights for evaluation sets

53

- eval_init_score: list of arrays, optional - Initial scores for evaluation sets

54

- eval_metric: str, list of str, or None, optional - Evaluation metrics

55

- feature_name: list of strings or 'auto', optional - Feature names

56

- categorical_feature: list of strings/ints or 'auto', optional - Categorical features

57

- early_stopping_rounds: int or None, optional - Early stopping rounds

58

- verbose: bool or int, optional - Controls verbosity of training

59

- log_evaluation: bool, int, or None, optional - Evaluation logging frequency

60

- callbacks: list of callback functions, optional - Custom callbacks

61

62

Returns:

63

- self: Returns self

64

"""

65

66

def predict(self, X, num_iteration=None, **kwargs):

67

"""

68

Make predictions on input data.

69

70

Parameters:

71

- X: array-like, shape=(n_samples, n_features) - Input features

72

- num_iteration: int or None, optional - Limit number of iterations for prediction

73

74

Returns:

75

- array-like, shape=(n_samples,) - Predicted values

76

"""

77

78

@property

79

def booster_(self):

80

"""Get the underlying Booster object."""

81

82

@property

83

def feature_importances_(self):

84

"""Get feature importances array."""

85

86

@property

87

def feature_name_(self):

88

"""Get feature names list."""

89

90

@property

91

def n_features_(self):

92

"""Get number of features."""

93

94

@property

95

def objective_(self):

96

"""Get the concrete objective used by this model."""

97

```

98

99

### Regression

100

101

LightGBM regressor for continuous target variables. Optimized for regression tasks with support for various loss functions and evaluation metrics.

102

103

```python { .api }

104

class LGBMRegressor(LGBMModel):

105

"""

106

LightGBM regressor for regression tasks.

107

108

Additional parameters:

109

- objective: str, default='regression' - Regression objective ('regression', 'regression_l1', 'huber', 'quantile', etc.)

110

"""

111

112

def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,

113

learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,

114

objective=None, class_weight=None, min_split_gain=0.,

115

min_child_weight=1e-3, min_child_samples=20, subsample=1.,

116

subsample_freq=0, colsample_bytree=1., reg_alpha=0.,

117

reg_lambda=0., random_state=None, n_jobs=None,

118

importance_type='split', **kwargs):

119

"""Initialize LGBMRegressor with regression-specific defaults."""

120

121

def fit(self, X, y, **kwargs):

122

"""Fit regression model. Inherits from LGBMModel.fit()."""

123

124

def predict(self, X, num_iteration=None, **kwargs):

125

"""

126

Predict regression target for X.

127

128

Returns:

129

- array-like, shape=(n_samples,) - Predicted regression values

130

"""

131

132

def score(self, X, y, sample_weight=None):

133

"""

134

Return the coefficient of determination R^2 of the prediction.

135

136

Parameters:

137

- X: array-like, shape=(n_samples, n_features) - Test samples

138

- y: array-like, shape=(n_samples,) - True values for X

139

- sample_weight: array-like, shape=(n_samples,), optional - Sample weights

140

141

Returns:

142

- float: R^2 of self.predict(X) wrt. y

143

"""

144

```

145

146

### Classification

147

148

LightGBM classifier for discrete target variables. Supports both binary and multiclass classification with probability estimation and class prediction.

149

150

```python { .api }

151

class LGBMClassifier(LGBMModel):

152

"""

153

LightGBM classifier for classification tasks.

154

155

Additional parameters:

156

- objective: str, default='binary' or 'multiclass' - Classification objective

157

"""

158

159

def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,

160

learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,

161

objective=None, class_weight=None, min_split_gain=0.,

162

min_child_weight=1e-3, min_child_samples=20, subsample=1.,

163

subsample_freq=0, colsample_bytree=1., reg_alpha=0.,

164

reg_lambda=0., random_state=None, n_jobs=None,

165

importance_type='split', **kwargs):

166

"""Initialize LGBMClassifier with classification-specific defaults."""

167

168

def fit(self, X, y, **kwargs):

169

"""Fit classification model. Inherits from LGBMModel.fit()."""

170

171

def predict(self, X, num_iteration=None, **kwargs):

172

"""

173

Predict class labels for X.

174

175

Returns:

176

- array-like, shape=(n_samples,) - Predicted class labels

177

"""

178

179

def predict_proba(self, X, num_iteration=None, **kwargs):

180

"""

181

Predict class probabilities for X.

182

183

Returns:

184

- array-like, shape=(n_samples, n_classes) - Class probabilities

185

"""

186

187

def score(self, X, y, sample_weight=None):

188

"""

189

Return the mean accuracy on the given test data and labels.

190

191

Parameters:

192

- X: array-like, shape=(n_samples, n_features) - Test samples

193

- y: array-like, shape=(n_samples,) - True labels for X

194

- sample_weight: array-like, shape=(n_samples,), optional - Sample weights

195

196

Returns:

197

- float: Mean accuracy of self.predict(X) wrt. y

198

"""

199

200

@property

201

def classes_(self):

202

"""Get unique class labels."""

203

204

@property

205

def n_classes_(self):

206

"""Get number of classes."""

207

```

208

209

### Ranking

210

211

LightGBM ranker for learning-to-rank tasks. Optimized for ranking scenarios where the goal is to order items rather than predict absolute values.

212

213

```python { .api }

214

class LGBMRanker(LGBMModel):

215

"""

216

LightGBM ranker for learning-to-rank tasks.

217

218

Additional parameters:

219

- objective: str, default='rank_xendcg' - Ranking objective ('lambdarank', 'rank_xendcg')

220

"""

221

222

def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,

223

learning_rate=0.1, n_estimators=100, subsample_for_bin=200000,

224

objective=None, class_weight=None, min_split_gain=0.,

225

min_child_weight=1e-3, min_child_samples=20, subsample=1.,

226

subsample_freq=0, colsample_bytree=1., reg_alpha=0.,

227

reg_lambda=0., random_state=None, n_jobs=None,

228

importance_type='split', **kwargs):

229

"""Initialize LGBMRanker with ranking-specific defaults."""

230

231

def fit(self, X, y, group=None, **kwargs):

232

"""

233

Fit ranking model.

234

235

Parameters:

236

- X: array-like, shape=(n_samples, n_features) - Input features

237

- y: array-like, shape=(n_samples,) - Target ranking scores

238

- group: array-like, shape=(n_groups,) - Group/query sizes for ranking

239

"""

240

241

def predict(self, X, num_iteration=None, **kwargs):

242

"""

243

Predict ranking scores for X.

244

245

Returns:

246

- array-like, shape=(n_samples,) - Predicted ranking scores

247

"""

248

249

def score(self, X, y, sample_weight=None):

250

"""

251

Return the ranking evaluation score.

252

253

Parameters:

254

- X: array-like, shape=(n_samples, n_features) - Test samples

255

- y: array-like, shape=(n_samples,) - True ranking scores for X

256

- sample_weight: array-like, shape=(n_samples,), optional - Sample weights

257

258

Returns:

259

- float: Ranking evaluation score

260

"""

261

```

262

263

## Usage Examples

264

265

### Regression Example

266

267

```python

268

import lightgbm as lgb

269

from sklearn.datasets import load_boston

270

from sklearn.model_selection import train_test_split

271

from sklearn.metrics import mean_squared_error, r2_score

272

273

# Load data

274

X, y = load_boston(return_X_y=True)

275

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

276

277

# Initialize and train regressor

278

regressor = lgb.LGBMRegressor(

279

objective='regression',

280

n_estimators=100,

281

learning_rate=0.1,

282

num_leaves=31,

283

random_state=42

284

)

285

286

regressor.fit(

287

X_train, y_train,

288

eval_set=[(X_test, y_test)],

289

eval_metric='l2',

290

early_stopping_rounds=10,

291

verbose=False

292

)

293

294

# Make predictions

295

predictions = regressor.predict(X_test)

296

print(f"R² Score: {r2_score(y_test, predictions):.4f}")

297

print(f"RMSE: {mean_squared_error(y_test, predictions, squared=False):.4f}")

298

```

299

300

### Classification Example

301

302

```python

303

import lightgbm as lgb

304

from sklearn.datasets import load_iris

305

from sklearn.model_selection import train_test_split

306

from sklearn.metrics import accuracy_score, classification_report

307

308

# Load data

309

X, y = load_iris(return_X_y=True)

310

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

311

312

# Initialize and train classifier

313

classifier = lgb.LGBMClassifier(

314

objective='multiclass',

315

n_estimators=100,

316

learning_rate=0.1,

317

num_leaves=31,

318

random_state=42

319

)

320

321

classifier.fit(

322

X_train, y_train,

323

eval_set=[(X_test, y_test)],

324

eval_metric='multi_logloss',

325

early_stopping_rounds=10,

326

verbose=False

327

)

328

329

# Make predictions

330

predictions = classifier.predict(X_test)

331

probabilities = classifier.predict_proba(X_test)

332

333

print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")

334

print(f"Classes: {classifier.classes_}")

335

print(f"Feature Importances: {classifier.feature_importances_}")

336

```

337

338

### Ranking Example

339

340

```python

341

import lightgbm as lgb

342

import numpy as np

343

from sklearn.datasets import make_regression

344

345

# Create ranking data

346

X, y = make_regression(n_samples=1000, n_features=10, random_state=42)

347

# Create groups for ranking (query sizes)

348

group = np.random.randint(10, 50, size=20) # 20 queries with varying sizes

349

group = group[group.cumsum() <= 1000] # Ensure total doesn't exceed samples

350

351

# Initialize and train ranker

352

ranker = lgb.LGBMRanker(

353

objective='rank_xendcg',

354

n_estimators=100,

355

learning_rate=0.1,

356

num_leaves=31,

357

random_state=42

358

)

359

360

ranker.fit(X, y, group=group)

361

362

# Make predictions

363

ranking_scores = ranker.predict(X)

364

print(f"Ranking scores shape: {ranking_scores.shape}")

365

print(f"Sample ranking scores: {ranking_scores[:10]}")

366

```