Tessl Tile for pypi/xgboost@3.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

callbacks.md configuration.md core-api.md distributed-computing.md index.md sklearn-interface.md visualization.md

sklearn-interface.mddocs/

0
# Scikit-Learn Interface
1

2
XGBoost provides scikit-learn compatible estimators that follow sklearn conventions for seamless integration with existing ML pipelines. These estimators provide familiar fit/predict interfaces while leveraging XGBoost's powerful gradient boosting algorithms.
3

4
## Capabilities
5

6
### Base Model Class
7

8
Base class for all XGBoost sklearn-compatible estimators.
9

10
```python { .api }
11
class XGBModel:
12
    def __init__(
13
        self,
14
        n_estimators=100,
15
        max_depth=None,
16
        max_leaves=None,
17
        max_bin=None,
18
        grow_policy=None,
19
        learning_rate=None,
20
        verbosity=None,
21
        objective=None,
22
        booster=None,
23
        tree_method=None,
24
        n_jobs=None,
25
        gamma=None,
26
        min_child_weight=None,
27
        max_delta_step=None,
28
        subsample=None,
29
        sampling_method=None,
30
        colsample_bytree=None,
31
        colsample_bylevel=None,
32
        colsample_bynode=None,
33
        reg_alpha=None,
34
        reg_lambda=None,
35
        scale_pos_weight=None,
36
        base_score=None,
37
        random_state=None,
38
        missing=None,
39
        num_parallel_tree=None,
40
        monotone_constraints=None,
41
        interaction_constraints=None,
42
        importance_type='gain',
43
        device=None,
44
        validate_parameters=None,
45
        enable_categorical=False,
46
        feature_types=None,
47
        max_cat_to_onehot=None,
48
        max_cat_threshold=None,
49
        multi_strategy=None,
50
        eval_metric=None,
51
        early_stopping_rounds=None,
52
        callbacks=None,
53
        **kwargs
54
    ):
55
        """
56
        Base XGBoost sklearn-compatible estimator.
57

58
        Parameters:
59
        - n_estimators: Number of boosting rounds
60
        - max_depth: Maximum tree depth
61
        - learning_rate: Boosting learning rate
62
        - objective: Learning objective
63
        - booster: Booster type ('gbtree', 'gblinear', 'dart')
64
        - tree_method: Tree construction algorithm
65
        - n_jobs: Number of parallel threads
66
        - gamma: Minimum loss reduction for split
67
        - min_child_weight: Minimum sum of instance weight in child
68
        - subsample: Subsample ratio of training instances
69
        - colsample_bytree: Subsample ratio of columns per tree
70
        - reg_alpha: L1 regularization term
71
        - reg_lambda: L2 regularization term
72
        - random_state: Random seed
73
        - enable_categorical: Enable categorical feature support
74
        """
75

76
    def fit(
77
        self,
78
        X,
79
        y,
80
        sample_weight=None,
81
        base_margin=None,
82
        eval_set=None,
83
        eval_metric=None,
84
        early_stopping_rounds=None,
85
        verbose=True,
86
        xgb_model=None,
87
        sample_weight_eval_set=None,
88
        base_margin_eval_set=None,
89
        feature_weights=None,
90
        callbacks=None
91
    ):
92
        """
93
        Fit the model to training data.
94

95
        Parameters:
96
        - X: Training features
97
        - y: Training labels
98
        - sample_weight: Sample weights
99
        - base_margin: Base margin for each sample
100
        - eval_set: Evaluation sets as list of (X, y) tuples
101
        - eval_metric: Evaluation metric(s)
102
        - early_stopping_rounds: Early stopping rounds
103
        - verbose: Verbosity
104
        - xgb_model: Existing model to continue training
105
        - sample_weight_eval_set: Sample weights for eval sets
106
        - base_margin_eval_set: Base margins for eval sets
107
        - feature_weights: Feature weights
108
        - callbacks: Callback functions
109

110
        Returns:
111
        Self
112
        """
113

114
    def predict(
115
        self,
116
        X,
117
        output_margin=False,
118
        validate_features=True,
119
        base_margin=None,
120
        iteration_range=None
121
    ):
122
        """
123
        Make predictions on input data.
124

125
        Parameters:
126
        - X: Input features
127
        - output_margin: Output raw margins
128
        - validate_features: Validate feature names/types
129
        - base_margin: Base margin for each sample
130
        - iteration_range: Range of boosting rounds
131

132
        Returns:
133
        Predictions as numpy array
134
        """
135

136
    def get_booster(self):
137
        """Get underlying Booster object."""
138

139
    def save_model(self, fname):
140
        """Save model to file."""
141

142
    def load_model(self, fname):
143
        """Load model from file."""
144

145
    @property
146
    def feature_importances_(self):
147
        """Feature importances as numpy array."""
148

149
    def get_params(self, deep=True):
150
        """Get estimator parameters."""
151

152
    def set_params(self, **params):
153
        """Set estimator parameters."""
154
```
155

156
### Regression
157

158
XGBoost regressor for continuous target variables.
159

160
```python { .api }
161
class XGBRegressor(XGBModel):
162
    def __init__(self, **kwargs):
163
        """
164
        XGBoost regressor.
165
        
166
        Inherits all parameters from XGBModel.
167
        Default objective: 'reg:squarederror'
168
        """
169

170
    def fit(self, X, y, **kwargs):
171
        """Fit regressor to training data."""
172

173
    def predict(self, X, **kwargs):
174
        """Predict continuous values."""
175
```
176

177
### Classification
178

179
XGBoost classifier for categorical target variables.
180

181
```python { .api }
182
class XGBClassifier(XGBModel):
183
    def __init__(self, **kwargs):
184
        """
185
        XGBoost classifier.
186
        
187
        Inherits all parameters from XGBModel.
188
        Default objective: 'binary:logistic' or 'multi:softprob'
189
        """
190

191
    def fit(self, X, y, **kwargs):
192
        """Fit classifier to training data."""
193

194
    def predict(self, X, **kwargs):
195
        """Predict class labels."""
196

197
    def predict_proba(
198
        self,
199
        X,
200
        validate_features=True,
201
        base_margin=None,
202
        iteration_range=None
203
    ):
204
        """
205
        Predict class probabilities.
206

207
        Parameters:
208
        - X: Input features
209
        - validate_features: Validate feature names/types
210
        - base_margin: Base margin for each sample
211
        - iteration_range: Range of boosting rounds
212

213
        Returns:
214
        Class probabilities as numpy array
215
        """
216

217
    def predict_log_proba(self, X, **kwargs):
218
        """Predict log class probabilities."""
219

220
    @property
221
    def classes_(self):
222
        """Unique class labels."""
223
```
224

225
### Ranking
226

227
XGBoost ranker for learning-to-rank problems.
228

229
```python { .api }
230
class XGBRanker(XGBModel):
231
    def __init__(self, **kwargs):
232
        """
233
        XGBoost ranker for learning-to-rank.
234
        
235
        Inherits all parameters from XGBModel.
236
        Default objective: 'rank:pairwise'
237
        """
238

239
    def fit(
240
        self,
241
        X,
242
        y,
243
        group=None,
244
        qid=None,
245
        sample_weight=None,
246
        base_margin=None,
247
        eval_set=None,
248
        eval_group=None,
249
        eval_qid=None,
250
        eval_metric=None,
251
        early_stopping_rounds=None,
252
        verbose=True,
253
        xgb_model=None,
254
        sample_weight_eval_set=None,
255
        base_margin_eval_set=None,
256
        feature_weights=None,
257
        callbacks=None
258
    ):
259
        """
260
        Fit ranker to training data.
261

262
        Parameters:
263
        - X: Training features
264
        - y: Training relevance scores
265
        - group: Group sizes for queries
266
        - qid: Query IDs for each sample
267
        - (other parameters same as XGBModel.fit)
268

269
        Returns:
270
        Self
271
        """
272

273
    def predict(self, X, **kwargs):
274
        """Predict ranking scores."""
275
```
276

277
### Random Forest Variants
278

279
XGBoost implementations of random forest algorithms.
280

281
```python { .api }
282
class XGBRFRegressor(XGBModel):
283
    def __init__(self, **kwargs):
284
        """
285
        XGBoost random forest regressor.
286
        
287
        Configured with random forest defaults:
288
        - colsample_bynode=0.8
289
        - learning_rate=1.0
290
        - max_depth=None
291
        - n_estimators=100
292
        - num_parallel_tree=100
293
        - reg_lambda=1e-5
294
        - subsample=0.8
295
        """
296

297
    def fit(self, X, y, **kwargs):
298
        """Fit random forest regressor."""
299

300
    def predict(self, X, **kwargs):
301
        """Predict using random forest."""
302

303
class XGBRFClassifier(XGBModel):
304
    def __init__(self, **kwargs):
305
        """
306
        XGBoost random forest classifier.
307
        
308
        Same defaults as XGBRFRegressor with classification objective.
309
        """
310

311
    def fit(self, X, y, **kwargs):
312
        """Fit random forest classifier."""
313

314
    def predict(self, X, **kwargs):
315
        """Predict class labels using random forest."""
316

317
    def predict_proba(self, X, **kwargs):
318
        """Predict class probabilities using random forest."""
319
```
320

321
## Usage Examples
322

323
### Basic Classification
324

325
```python
326
from xgboost import XGBClassifier
327
from sklearn.datasets import load_iris
328
from sklearn.model_selection import train_test_split
329

330
# Load data
331
X, y = load_iris(return_X_y=True)
332
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
333

334
# Train classifier
335
clf = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
336
clf.fit(X_train, y_train)
337

338
# Make predictions
339
y_pred = clf.predict(X_test)
340
y_proba = clf.predict_proba(X_test)
341

342
# Feature importance
343
importance = clf.feature_importances_
344
```
345

346
### Regression with Early Stopping
347

348
```python
349
from xgboost import XGBRegressor
350
from sklearn.datasets import load_boston
351
from sklearn.model_selection import train_test_split
352

353
# Load data
354
X, y = load_boston(return_X_y=True)
355
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
356

357
# Train with early stopping
358
reg = XGBRegressor(
359
    n_estimators=1000,
360
    max_depth=3,
361
    learning_rate=0.1,
362
    early_stopping_rounds=10
363
)
364

365
reg.fit(
366
    X_train, y_train,
367
    eval_set=[(X_test, y_test)],
368
    verbose=False
369
)
370

371
# Predict
372
y_pred = reg.predict(X_test)
373
```

Version

Tile

Files

sklearn-interface.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

sklearn-interface.mddocs/