Tessl Tile for pypi/catboost@1.2.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-features.md core-models.md data-handling.md datasets.md evaluation.md feature-analysis.md index.md metrics.md training-evaluation.md utilities.md visualization.md

visualization.mddocs/

0
# Visualization
1

2
CatBoost provides interactive visualization components specifically designed for Jupyter notebooks, along with compatibility layers for XGBoost and LightGBM plotting workflows. These tools enable real-time monitoring of training progress and model analysis.
3

4
## Capabilities
5

6
### Interactive Jupyter Widgets
7

8
Visualization components that integrate seamlessly with Jupyter notebook environments.
9

10
```python { .api }
11
class MetricVisualizer:
12
    """
13
    Interactive widget for visualizing training metrics in Jupyter notebooks.
14
    
15
    Provides real-time plots of training and validation metrics during model training,
16
    with interactive controls for zooming, filtering, and metric selection.
17
    """
18
    
19
    def __init__(self, train_dirs=None, subdirs=None):
20
        """
21
        Initialize MetricVisualizer widget.
22
        
23
        Parameters:
24
        - train_dirs: List of training directories to monitor (list of strings)
25
        - subdirs: Subdirectories within train_dirs to include (list of strings)
26
        """
27
    
28
    def start(self, train_dirs=None, subdirs=None):
29
        """
30
        Start the metric visualization widget.
31
        
32
        Parameters:
33
        - train_dirs: Training directories to visualize (list of strings)
34
        - subdirs: Subdirectories to include (list of strings)
35
        
36
        Returns:
37
        Interactive Jupyter widget displaying training metrics
38
        """
39
    
40
    def stop(self):
41
        """Stop the metric visualization widget."""
42

43
class MetricsPlotter:
44
    """
45
    Utility class for plotting training metrics with matplotlib integration.
46
    
47
    Provides static and dynamic plotting capabilities for CatBoost training metrics,
48
    with customizable styling and export options.
49
    """
50
    
51
    def __init__(self, train_dir=None):
52
        """
53
        Initialize MetricsPlotter.
54
        
55
        Parameters:
56
        - train_dir: Training directory containing metric logs (string)
57
        """
58
    
59
    def plot_metrics(self, metrics=None, train_dir=None, figsize=(12, 8), 
60
                    title=None, save_path=None):
61
        """
62
        Plot training metrics from log files.
63
        
64
        Parameters:
65
        - metrics: List of metrics to plot (list of strings)
66
        - train_dir: Directory containing training logs (string)
67
        - figsize: Figure size for matplotlib (tuple)
68
        - title: Plot title (string)
69
        - save_path: Path to save plot image (string)
70
        
71
        Returns:
72
        matplotlib.figure.Figure: Generated plot figure
73
        """
74
    
75
    def plot_feature_importance(self, model, feature_names=None, 
76
                               max_features=20, figsize=(10, 8), 
77
                               title="Feature Importance", save_path=None):
78
        """
79
        Plot feature importance from trained model.
80
        
81
        Parameters:
82
        - model: Trained CatBoost model
83
        - feature_names: Feature names for labeling (list of strings)
84
        - max_features: Maximum number of features to show (int)
85
        - figsize: Figure size for matplotlib (tuple)
86
        - title: Plot title (string)
87
        - save_path: Path to save plot (string)
88
        
89
        Returns:
90
        matplotlib.figure.Figure: Feature importance plot
91
        """
92
    
93
    def plot_learning_curve(self, train_scores, val_scores=None, 
94
                           metric_name="Loss", figsize=(10, 6),
95
                           title="Learning Curve", save_path=None):
96
        """
97
        Plot learning curves for training and validation.
98
        
99
        Parameters:
100
        - train_scores: Training metric scores (array-like)
101
        - val_scores: Validation metric scores (array-like, optional)
102
        - metric_name: Name of the metric being plotted (string)
103
        - figsize: Figure size for matplotlib (tuple)
104
        - title: Plot title (string)
105
        - save_path: Path to save plot (string)
106
        
107
        Returns:
108
        matplotlib.figure.Figure: Learning curve plot
109
        """
110
```
111

112
### Framework Compatibility Callbacks
113

114
Plotting callbacks compatible with XGBoost and LightGBM workflows for easy migration.
115

116
```python { .api }
117
def XGBPlottingCallback(period=1, show_stdv=False, figsize=(10, 6)):
118
    """
119
    Create XGBoost-style plotting callback for CatBoost training.
120
    
121
    Provides compatibility with XGBoost plotting workflows when migrating
122
    to CatBoost, maintaining similar API and visualization style.
123
    
124
    Parameters:
125
    - period: Plotting update period in iterations (int)
126
    - show_stdv: Show standard deviation bands for CV (bool)
127
    - figsize: Figure size for matplotlib plots (tuple)
128
    
129
    Returns:
130
    Callback function for use with CatBoost training
131
    
132
    Usage:
133
    model.fit(X, y, callbacks=[XGBPlottingCallback(period=10)])
134
    """
135

136
def lgbm_plotting_callback(period=1, show_stdv=False, figsize=(10, 6)):
137
    """
138
    Create LightGBM-style plotting callback for CatBoost training.
139
    
140
    Provides compatibility with LightGBM plotting workflows when migrating
141
    to CatBoost, maintaining similar API and visualization patterns.
142
    
143
    Parameters:
144
    - period: Plotting update period in iterations (int)
145
    - show_stdv: Show standard deviation bands for CV (bool)
146
    - figsize: Figure size for matplotlib plots (tuple)
147
    
148
    Returns:
149
    Callback function for use with CatBoost training
150
    
151
    Usage:
152
    model.fit(X, y, callbacks=[lgbm_plotting_callback(period=5)])
153
    """
154
```
155

156
### Built-in Model Plotting Methods
157

158
Direct plotting methods available on trained CatBoost models.
159

160
```python { .api }
161
# These methods are available on trained CatBoost model objects
162
def plot_tree(self, tree_idx=0, pool=None, figsize=(20, 15), 
163
              save_path=None, title=None):
164
    """
165
    Visualize individual decision tree from the ensemble.
166
    
167
    Parameters:
168
    - tree_idx: Index of tree to visualize (int)
169
    - pool: Pool for leaf value calculation (Pool, optional)
170
    - figsize: Figure size for visualization (tuple)
171
    - save_path: Path to save tree visualization (string)
172
    - title: Plot title (string)
173
    
174
    Returns:
175
    Tree visualization plot
176
    """
177

178
def plot_predictions(self, data, target=None, figsize=(10, 6),
179
                    title="Predictions vs Actual", save_path=None):
180
    """
181
    Plot model predictions against actual values.
182
    
183
    Parameters:
184
    - data: Input data for predictions (Pool or array-like)
185
    - target: True target values (array-like, optional)
186
    - figsize: Figure size for matplotlib (tuple)
187
    - title: Plot title (string)
188
    - save_path: Path to save plot (string)
189
    
190
    Returns:
191
    matplotlib.figure.Figure: Predictions scatter plot
192
    """
193
```
194

195
## Visualization Examples
196

197
### Basic Training Visualization
198

199
```python
200
from catboost import CatBoostClassifier
201
from catboost.widget import MetricVisualizer
202
import pandas as pd
203

204
# Prepare data
205
df = pd.read_csv('train.csv')
206
X = df.drop('target', axis=1)
207
y = df['target']
208

209
# Initialize visualizer (in Jupyter notebook)
210
visualizer = MetricVisualizer()
211

212
# Train model with visualization
213
model = CatBoostClassifier(
214
    iterations=500,
215
    learning_rate=0.1,
216
    depth=6,
217
    eval_metric='AUC',
218
    train_dir='./catboost_training',  # Required for visualization
219
    verbose=True
220
)
221

222
# Start visualization widget
223
visualizer.start(train_dirs=['./catboost_training'])
224

225
# Fit model (metrics will be visualized in real-time)
226
model.fit(
227
    X, y,
228
    eval_set=[(X_val, y_val)],
229
    plot=True  # Enable built-in plotting
230
)
231

232
# Stop visualization when done
233
visualizer.stop()
234
```
235

236
### Advanced Metrics Plotting
237

238
```python
239
from catboost.widget import MetricsPlotter
240
from catboost import CatBoostRegressor, cv
241
import matplotlib.pyplot as plt
242

243
# Initialize plotter
244
plotter = MetricsPlotter()
245

246
# Train model with comprehensive logging
247
model = CatBoostRegressor(
248
    iterations=1000,
249
    learning_rate=0.05,
250
    depth=8,
251
    eval_metric=['RMSE', 'MAE', 'R2'],
252
    train_dir='./detailed_training',
253
    metric_period=10,
254
    verbose=100
255
)
256

257
model.fit(
258
    X_train, y_train,
259
    eval_set=[(X_val, y_val)],
260
    early_stopping_rounds=50,
261
    use_best_model=True
262
)
263

264
# Plot multiple metrics
265
fig = plotter.plot_metrics(
266
    metrics=['RMSE', 'MAE', 'R2'],
267
    train_dir='./detailed_training',
268
    figsize=(15, 10),
269
    title='CatBoost Training Metrics',
270
    save_path='training_metrics.png'
271
)
272

273
plt.show()
274

275
# Plot feature importance
276
importance_fig = plotter.plot_feature_importance(
277
    model=model,
278
    feature_names=X_train.columns.tolist(),
279
    max_features=25,
280
    title='Top 25 Most Important Features'
281
)
282

283
plt.show()
284
```
285

286
### Cross-Validation Visualization
287

288
```python
289
from catboost import cv, Pool
290
from catboost.widget import MetricsPlotter
291
import numpy as np
292
import matplotlib.pyplot as plt
293

294
# Create pool for CV
295
cv_pool = Pool(X_train, y_train, cat_features=['category'])
296

297
# Perform cross-validation with detailed logging
298
cv_results = cv(
299
    pool=cv_pool,
300
    params={
301
        'iterations': 500,
302
        'learning_rate': 0.1,
303
        'depth': 6,
304
        'loss_function': 'RMSE',
305
        'eval_metric': 'RMSE',
306
        'train_dir': './cv_training'
307
    },
308
    fold_count=5,
309
    shuffle=True,
310
    partition_random_seed=42,
311
    plot=True,
312
    verbose=50
313
)
314

315
# Extract scores for custom plotting
316
train_scores = cv_results['train-RMSE-mean'].values
317
val_scores = cv_results['test-RMSE-mean'].values
318
train_std = cv_results['train-RMSE-std'].values
319
val_std = cv_results['test-RMSE-std'].values
320

321
# Create custom learning curve with confidence intervals
322
plotter = MetricsPlotter()
323
fig, ax = plt.subplots(figsize=(12, 8))
324

325
iterations = np.arange(1, len(train_scores) + 1)
326

327
# Plot mean scores
328
ax.plot(iterations, train_scores, 'b-', label='Training RMSE', linewidth=2)
329
ax.plot(iterations, val_scores, 'r-', label='Validation RMSE', linewidth=2)
330

331
# Add confidence intervals
332
ax.fill_between(iterations, train_scores - train_std, train_scores + train_std, 
333
                alpha=0.2, color='blue')
334
ax.fill_between(iterations, val_scores - val_std, val_scores + val_std, 
335
                alpha=0.2, color='red')
336

337
ax.set_xlabel('Iteration')
338
ax.set_ylabel('RMSE')
339
ax.set_title('5-Fold Cross-Validation Learning Curves')
340
ax.legend()
341
ax.grid(True, alpha=0.3)
342

343
plt.tight_layout()
344
plt.savefig('cv_learning_curves.png', dpi=300, bbox_inches='tight')
345
plt.show()
346

347
print(f"Best CV score: {val_scores.min():.4f} ± {val_std[val_scores.argmin()]:.4f}")
348
```
349

350
### Framework Compatibility Examples
351

352
```python
353
from catboost import CatBoostClassifier
354
from catboost.widget import XGBPlottingCallback, lgbm_plotting_callback
355

356
# XGBoost-style plotting
357
xgb_callback = XGBPlottingCallback(period=25, show_stdv=True, figsize=(12, 8))
358

359
model_xgb_style = CatBoostClassifier(
360
    iterations=300,
361
    learning_rate=0.1,
362
    depth=6,
363
    verbose=False
364
)
365

366
model_xgb_style.fit(
367
    X_train, y_train,
368
    eval_set=[(X_val, y_val)],
369
    callbacks=[xgb_callback]
370
)
371

372
# LightGBM-style plotting  
373
lgbm_callback = lgbm_plotting_callback(period=20, figsize=(10, 6))
374

375
model_lgbm_style = CatBoostClassifier(
376
    iterations=300,
377
    learning_rate=0.1,
378
    depth=6,
379
    verbose=False
380
)
381

382
model_lgbm_style.fit(
383
    X_train, y_train,
384
    eval_set=[(X_val, y_val)],
385
    callbacks=[lgbm_callback]
386
)
387
```
388

389
### Interactive Feature Analysis Visualization
390

391
```python
392
from catboost import CatBoostClassifier, EFstrType
393
from catboost.widget import MetricsPlotter
394
import matplotlib.pyplot as plt
395
import seaborn as sns
396
import pandas as pd
397

398
# Train model
399
model = CatBoostClassifier(iterations=200, verbose=False)
400
model.fit(X_train, y_train)
401

402
# Get SHAP values for visualization
403
shap_values = model.get_feature_importance(
404
    data=X_test[:100],  # First 100 samples for visualization
405
    type=EFstrType.ShapValues
406
)
407

408
# Create SHAP summary plot
409
plt.figure(figsize=(12, 8))
410
shap_df = pd.DataFrame(shap_values, columns=X_train.columns)
411

412
# Plot mean absolute SHAP values
413
mean_shap = shap_df.abs().mean().sort_values(ascending=True)
414
plt.barh(range(len(mean_shap)), mean_shap.values)
415
plt.yticks(range(len(mean_shap)), mean_shap.index)
416
plt.xlabel('Mean |SHAP Value|')
417
plt.title('Feature Importance (SHAP Values)')
418
plt.tight_layout()
419
plt.show()
420

421
# Feature interaction heatmap
422
interactions = model.get_feature_importance(type=EFstrType.Interaction)
423
plt.figure(figsize=(12, 10))
424
sns.heatmap(
425
    interactions,
426
    xticklabels=X_train.columns,
427
    yticklabels=X_train.columns,
428
    annot=False,
429
    cmap='RdBu_r',
430
    center=0
431
)
432
plt.title('Feature Interaction Matrix')
433
plt.tight_layout()
434
plt.show()
435
```
436

437
### Custom Visualization Dashboard
438

439
```python
440
import matplotlib.pyplot as plt
441
from matplotlib.gridspec import GridSpec
442
import numpy as np
443

444
def create_training_dashboard(model, X_test, y_test, cv_results=None):
445
    """Create comprehensive training dashboard."""
446
    
447
    fig = plt.figure(figsize=(20, 15))
448
    gs = GridSpec(3, 3, figure=fig)
449
    
450
    # 1. Learning curves
451
    ax1 = fig.add_subplot(gs[0, :2])
452
    if cv_results is not None:
453
        iterations = range(1, len(cv_results) + 1)
454
        ax1.plot(iterations, cv_results['train-RMSE-mean'], 'b-', label='Train')
455
        ax1.plot(iterations, cv_results['test-RMSE-mean'], 'r-', label='Validation')
456
        ax1.fill_between(iterations, 
457
                        cv_results['train-RMSE-mean'] - cv_results['train-RMSE-std'],
458
                        cv_results['train-RMSE-mean'] + cv_results['train-RMSE-std'],
459
                        alpha=0.2, color='blue')
460
        ax1.fill_between(iterations,
461
                        cv_results['test-RMSE-mean'] - cv_results['test-RMSE-std'], 
462
                        cv_results['test-RMSE-mean'] + cv_results['test-RMSE-std'],
463
                        alpha=0.2, color='red')
464
    ax1.set_title('Learning Curves')
465
    ax1.set_xlabel('Iteration')
466
    ax1.set_ylabel('RMSE')
467
    ax1.legend()
468
    ax1.grid(True, alpha=0.3)
469
    
470
    # 2. Feature importance
471
    ax2 = fig.add_subplot(gs[0, 2])
472
    importance = model.get_feature_importance()
473
    top_features = np.argsort(importance)[-10:]
474
    ax2.barh(range(len(top_features)), importance[top_features])
475
    ax2.set_yticks(range(len(top_features)))
476
    ax2.set_yticklabels([f'Feature_{i}' for i in top_features])
477
    ax2.set_title('Top 10 Features')
478
    ax2.set_xlabel('Importance')
479
    
480
    # 3. Predictions vs Actual
481
    ax3 = fig.add_subplot(gs[1, 0])
482
    predictions = model.predict(X_test)
483
    ax3.scatter(y_test, predictions, alpha=0.6)
484
    min_val = min(y_test.min(), predictions.min())
485
    max_val = max(y_test.max(), predictions.max())
486
    ax3.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)
487
    ax3.set_xlabel('Actual')
488
    ax3.set_ylabel('Predicted')
489
    ax3.set_title('Predictions vs Actual')
490
    
491
    # 4. Residuals
492
    ax4 = fig.add_subplot(gs[1, 1])
493
    residuals = y_test - predictions
494
    ax4.scatter(predictions, residuals, alpha=0.6)
495
    ax4.axhline(y=0, color='r', linestyle='--')
496
    ax4.set_xlabel('Predicted')
497
    ax4.set_ylabel('Residuals')
498
    ax4.set_title('Residual Plot')
499
    
500
    # 5. Residual distribution
501
    ax5 = fig.add_subplot(gs[1, 2])
502
    ax5.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
503
    ax5.set_xlabel('Residuals')
504
    ax5.set_ylabel('Frequency')
505
    ax5.set_title('Residual Distribution')
506
    
507
    # 6. Model metrics summary
508
    ax6 = fig.add_subplot(gs[2, :])
509
    ax6.axis('off')
510
    
511
    # Calculate metrics
512
    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
513
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
514
    mae = mean_absolute_error(y_test, predictions)
515
    r2 = r2_score(y_test, predictions)
516
    
517
    metrics_text = f"""
518
    Model Performance Metrics:
519
    
520
    RMSE: {rmse:.4f}
521
    MAE: {mae:.4f}
522
    R²: {r2:.4f}
523
    
524
    Model Info:
525
    Trees: {model.tree_count_}
526
    Features: {model.feature_count_}
527
    """
528
    
529
    ax6.text(0.1, 0.5, metrics_text, fontsize=12, verticalalignment='center',
530
             bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"))
531
    
532
    plt.tight_layout()
533
    plt.savefig('training_dashboard.png', dpi=300, bbox_inches='tight')
534
    plt.show()
535

536
# Usage
537
create_training_dashboard(model, X_test, y_test, cv_results)
538
```

Version

Tile

Files

visualization.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

visualization.mddocs/