or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

callbacks.mdconfiguration.mdcore-api.mddistributed-computing.mdindex.mdsklearn-interface.mdvisualization.md

sklearn-interface.mddocs/

0

# Scikit-Learn Interface

1

2

XGBoost provides scikit-learn compatible estimators that follow sklearn conventions for seamless integration with existing ML pipelines. These estimators provide familiar fit/predict interfaces while leveraging XGBoost's powerful gradient boosting algorithms.

3

4

## Capabilities

5

6

### Base Model Class

7

8

Base class for all XGBoost sklearn-compatible estimators.

9

10

```python { .api }

11

class XGBModel:

12

def __init__(

13

self,

14

n_estimators=100,

15

max_depth=None,

16

max_leaves=None,

17

max_bin=None,

18

grow_policy=None,

19

learning_rate=None,

20

verbosity=None,

21

objective=None,

22

booster=None,

23

tree_method=None,

24

n_jobs=None,

25

gamma=None,

26

min_child_weight=None,

27

max_delta_step=None,

28

subsample=None,

29

sampling_method=None,

30

colsample_bytree=None,

31

colsample_bylevel=None,

32

colsample_bynode=None,

33

reg_alpha=None,

34

reg_lambda=None,

35

scale_pos_weight=None,

36

base_score=None,

37

random_state=None,

38

missing=None,

39

num_parallel_tree=None,

40

monotone_constraints=None,

41

interaction_constraints=None,

42

importance_type='gain',

43

device=None,

44

validate_parameters=None,

45

enable_categorical=False,

46

feature_types=None,

47

max_cat_to_onehot=None,

48

max_cat_threshold=None,

49

multi_strategy=None,

50

eval_metric=None,

51

early_stopping_rounds=None,

52

callbacks=None,

53

**kwargs

54

):

55

"""

56

Base XGBoost sklearn-compatible estimator.

57

58

Parameters:

59

- n_estimators: Number of boosting rounds

60

- max_depth: Maximum tree depth

61

- learning_rate: Boosting learning rate

62

- objective: Learning objective

63

- booster: Booster type ('gbtree', 'gblinear', 'dart')

64

- tree_method: Tree construction algorithm

65

- n_jobs: Number of parallel threads

66

- gamma: Minimum loss reduction for split

67

- min_child_weight: Minimum sum of instance weight in child

68

- subsample: Subsample ratio of training instances

69

- colsample_bytree: Subsample ratio of columns per tree

70

- reg_alpha: L1 regularization term

71

- reg_lambda: L2 regularization term

72

- random_state: Random seed

73

- enable_categorical: Enable categorical feature support

74

"""

75

76

def fit(

77

self,

78

X,

79

y,

80

sample_weight=None,

81

base_margin=None,

82

eval_set=None,

83

eval_metric=None,

84

early_stopping_rounds=None,

85

verbose=True,

86

xgb_model=None,

87

sample_weight_eval_set=None,

88

base_margin_eval_set=None,

89

feature_weights=None,

90

callbacks=None

91

):

92

"""

93

Fit the model to training data.

94

95

Parameters:

96

- X: Training features

97

- y: Training labels

98

- sample_weight: Sample weights

99

- base_margin: Base margin for each sample

100

- eval_set: Evaluation sets as list of (X, y) tuples

101

- eval_metric: Evaluation metric(s)

102

- early_stopping_rounds: Early stopping rounds

103

- verbose: Verbosity

104

- xgb_model: Existing model to continue training

105

- sample_weight_eval_set: Sample weights for eval sets

106

- base_margin_eval_set: Base margins for eval sets

107

- feature_weights: Feature weights

108

- callbacks: Callback functions

109

110

Returns:

111

Self

112

"""

113

114

def predict(

115

self,

116

X,

117

output_margin=False,

118

validate_features=True,

119

base_margin=None,

120

iteration_range=None

121

):

122

"""

123

Make predictions on input data.

124

125

Parameters:

126

- X: Input features

127

- output_margin: Output raw margins

128

- validate_features: Validate feature names/types

129

- base_margin: Base margin for each sample

130

- iteration_range: Range of boosting rounds

131

132

Returns:

133

Predictions as numpy array

134

"""

135

136

def get_booster(self):

137

"""Get underlying Booster object."""

138

139

def save_model(self, fname):

140

"""Save model to file."""

141

142

def load_model(self, fname):

143

"""Load model from file."""

144

145

@property

146

def feature_importances_(self):

147

"""Feature importances as numpy array."""

148

149

def get_params(self, deep=True):

150

"""Get estimator parameters."""

151

152

def set_params(self, **params):

153

"""Set estimator parameters."""

154

```

155

156

### Regression

157

158

XGBoost regressor for continuous target variables.

159

160

```python { .api }

161

class XGBRegressor(XGBModel):

162

def __init__(self, **kwargs):

163

"""

164

XGBoost regressor.

165

166

Inherits all parameters from XGBModel.

167

Default objective: 'reg:squarederror'

168

"""

169

170

def fit(self, X, y, **kwargs):

171

"""Fit regressor to training data."""

172

173

def predict(self, X, **kwargs):

174

"""Predict continuous values."""

175

```

176

177

### Classification

178

179

XGBoost classifier for categorical target variables.

180

181

```python { .api }

182

class XGBClassifier(XGBModel):

183

def __init__(self, **kwargs):

184

"""

185

XGBoost classifier.

186

187

Inherits all parameters from XGBModel.

188

Default objective: 'binary:logistic' or 'multi:softprob'

189

"""

190

191

def fit(self, X, y, **kwargs):

192

"""Fit classifier to training data."""

193

194

def predict(self, X, **kwargs):

195

"""Predict class labels."""

196

197

def predict_proba(

198

self,

199

X,

200

validate_features=True,

201

base_margin=None,

202

iteration_range=None

203

):

204

"""

205

Predict class probabilities.

206

207

Parameters:

208

- X: Input features

209

- validate_features: Validate feature names/types

210

- base_margin: Base margin for each sample

211

- iteration_range: Range of boosting rounds

212

213

Returns:

214

Class probabilities as numpy array

215

"""

216

217

def predict_log_proba(self, X, **kwargs):

218

"""Predict log class probabilities."""

219

220

@property

221

def classes_(self):

222

"""Unique class labels."""

223

```

224

225

### Ranking

226

227

XGBoost ranker for learning-to-rank problems.

228

229

```python { .api }

230

class XGBRanker(XGBModel):

231

def __init__(self, **kwargs):

232

"""

233

XGBoost ranker for learning-to-rank.

234

235

Inherits all parameters from XGBModel.

236

Default objective: 'rank:pairwise'

237

"""

238

239

def fit(

240

self,

241

X,

242

y,

243

group=None,

244

qid=None,

245

sample_weight=None,

246

base_margin=None,

247

eval_set=None,

248

eval_group=None,

249

eval_qid=None,

250

eval_metric=None,

251

early_stopping_rounds=None,

252

verbose=True,

253

xgb_model=None,

254

sample_weight_eval_set=None,

255

base_margin_eval_set=None,

256

feature_weights=None,

257

callbacks=None

258

):

259

"""

260

Fit ranker to training data.

261

262

Parameters:

263

- X: Training features

264

- y: Training relevance scores

265

- group: Group sizes for queries

266

- qid: Query IDs for each sample

267

- (other parameters same as XGBModel.fit)

268

269

Returns:

270

Self

271

"""

272

273

def predict(self, X, **kwargs):

274

"""Predict ranking scores."""

275

```

276

277

### Random Forest Variants

278

279

XGBoost implementations of random forest algorithms.

280

281

```python { .api }

282

class XGBRFRegressor(XGBModel):

283

def __init__(self, **kwargs):

284

"""

285

XGBoost random forest regressor.

286

287

Configured with random forest defaults:

288

- colsample_bynode=0.8

289

- learning_rate=1.0

290

- max_depth=None

291

- n_estimators=100

292

- num_parallel_tree=100

293

- reg_lambda=1e-5

294

- subsample=0.8

295

"""

296

297

def fit(self, X, y, **kwargs):

298

"""Fit random forest regressor."""

299

300

def predict(self, X, **kwargs):

301

"""Predict using random forest."""

302

303

class XGBRFClassifier(XGBModel):

304

def __init__(self, **kwargs):

305

"""

306

XGBoost random forest classifier.

307

308

Same defaults as XGBRFRegressor with classification objective.

309

"""

310

311

def fit(self, X, y, **kwargs):

312

"""Fit random forest classifier."""

313

314

def predict(self, X, **kwargs):

315

"""Predict class labels using random forest."""

316

317

def predict_proba(self, X, **kwargs):

318

"""Predict class probabilities using random forest."""

319

```

320

321

## Usage Examples

322

323

### Basic Classification

324

325

```python

326

from xgboost import XGBClassifier

327

from sklearn.datasets import load_iris

328

from sklearn.model_selection import train_test_split

329

330

# Load data

331

X, y = load_iris(return_X_y=True)

332

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

333

334

# Train classifier

335

clf = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)

336

clf.fit(X_train, y_train)

337

338

# Make predictions

339

y_pred = clf.predict(X_test)

340

y_proba = clf.predict_proba(X_test)

341

342

# Feature importance

343

importance = clf.feature_importances_

344

```

345

346

### Regression with Early Stopping

347

348

```python

349

from xgboost import XGBRegressor

350

from sklearn.datasets import load_boston

351

from sklearn.model_selection import train_test_split

352

353

# Load data

354

X, y = load_boston(return_X_y=True)

355

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

356

357

# Train with early stopping

358

reg = XGBRegressor(

359

n_estimators=1000,

360

max_depth=3,

361

learning_rate=0.1,

362

early_stopping_rounds=10

363

)

364

365

reg.fit(

366

X_train, y_train,

367

eval_set=[(X_test, y_test)],

368

verbose=False

369

)

370

371

# Predict

372

y_pred = reg.predict(X_test)

373

```