or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-features.mdcore-models.mddata-handling.mddatasets.mdevaluation.mdfeature-analysis.mdindex.mdmetrics.mdtraining-evaluation.mdutilities.mdvisualization.md

visualization.mddocs/

0

# Visualization

1

2

CatBoost provides interactive visualization components specifically designed for Jupyter notebooks, along with compatibility layers for XGBoost and LightGBM plotting workflows. These tools enable real-time monitoring of training progress and model analysis.

3

4

## Capabilities

5

6

### Interactive Jupyter Widgets

7

8

Visualization components that integrate seamlessly with Jupyter notebook environments.

9

10

```python { .api }

11

class MetricVisualizer:

12

"""

13

Interactive widget for visualizing training metrics in Jupyter notebooks.

14

15

Provides real-time plots of training and validation metrics during model training,

16

with interactive controls for zooming, filtering, and metric selection.

17

"""

18

19

def __init__(self, train_dirs=None, subdirs=None):

20

"""

21

Initialize MetricVisualizer widget.

22

23

Parameters:

24

- train_dirs: List of training directories to monitor (list of strings)

25

- subdirs: Subdirectories within train_dirs to include (list of strings)

26

"""

27

28

def start(self, train_dirs=None, subdirs=None):

29

"""

30

Start the metric visualization widget.

31

32

Parameters:

33

- train_dirs: Training directories to visualize (list of strings)

34

- subdirs: Subdirectories to include (list of strings)

35

36

Returns:

37

Interactive Jupyter widget displaying training metrics

38

"""

39

40

def stop(self):

41

"""Stop the metric visualization widget."""

42

43

class MetricsPlotter:

44

"""

45

Utility class for plotting training metrics with matplotlib integration.

46

47

Provides static and dynamic plotting capabilities for CatBoost training metrics,

48

with customizable styling and export options.

49

"""

50

51

def __init__(self, train_dir=None):

52

"""

53

Initialize MetricsPlotter.

54

55

Parameters:

56

- train_dir: Training directory containing metric logs (string)

57

"""

58

59

def plot_metrics(self, metrics=None, train_dir=None, figsize=(12, 8),

60

title=None, save_path=None):

61

"""

62

Plot training metrics from log files.

63

64

Parameters:

65

- metrics: List of metrics to plot (list of strings)

66

- train_dir: Directory containing training logs (string)

67

- figsize: Figure size for matplotlib (tuple)

68

- title: Plot title (string)

69

- save_path: Path to save plot image (string)

70

71

Returns:

72

matplotlib.figure.Figure: Generated plot figure

73

"""

74

75

def plot_feature_importance(self, model, feature_names=None,

76

max_features=20, figsize=(10, 8),

77

title="Feature Importance", save_path=None):

78

"""

79

Plot feature importance from trained model.

80

81

Parameters:

82

- model: Trained CatBoost model

83

- feature_names: Feature names for labeling (list of strings)

84

- max_features: Maximum number of features to show (int)

85

- figsize: Figure size for matplotlib (tuple)

86

- title: Plot title (string)

87

- save_path: Path to save plot (string)

88

89

Returns:

90

matplotlib.figure.Figure: Feature importance plot

91

"""

92

93

def plot_learning_curve(self, train_scores, val_scores=None,

94

metric_name="Loss", figsize=(10, 6),

95

title="Learning Curve", save_path=None):

96

"""

97

Plot learning curves for training and validation.

98

99

Parameters:

100

- train_scores: Training metric scores (array-like)

101

- val_scores: Validation metric scores (array-like, optional)

102

- metric_name: Name of the metric being plotted (string)

103

- figsize: Figure size for matplotlib (tuple)

104

- title: Plot title (string)

105

- save_path: Path to save plot (string)

106

107

Returns:

108

matplotlib.figure.Figure: Learning curve plot

109

"""

110

```

111

112

### Framework Compatibility Callbacks

113

114

Plotting callbacks compatible with XGBoost and LightGBM workflows for easy migration.

115

116

```python { .api }

117

def XGBPlottingCallback(period=1, show_stdv=False, figsize=(10, 6)):

118

"""

119

Create XGBoost-style plotting callback for CatBoost training.

120

121

Provides compatibility with XGBoost plotting workflows when migrating

122

to CatBoost, maintaining similar API and visualization style.

123

124

Parameters:

125

- period: Plotting update period in iterations (int)

126

- show_stdv: Show standard deviation bands for CV (bool)

127

- figsize: Figure size for matplotlib plots (tuple)

128

129

Returns:

130

Callback function for use with CatBoost training

131

132

Usage:

133

model.fit(X, y, callbacks=[XGBPlottingCallback(period=10)])

134

"""

135

136

def lgbm_plotting_callback(period=1, show_stdv=False, figsize=(10, 6)):

137

"""

138

Create LightGBM-style plotting callback for CatBoost training.

139

140

Provides compatibility with LightGBM plotting workflows when migrating

141

to CatBoost, maintaining similar API and visualization patterns.

142

143

Parameters:

144

- period: Plotting update period in iterations (int)

145

- show_stdv: Show standard deviation bands for CV (bool)

146

- figsize: Figure size for matplotlib plots (tuple)

147

148

Returns:

149

Callback function for use with CatBoost training

150

151

Usage:

152

model.fit(X, y, callbacks=[lgbm_plotting_callback(period=5)])

153

"""

154

```

155

156

### Built-in Model Plotting Methods

157

158

Direct plotting methods available on trained CatBoost models.

159

160

```python { .api }

161

# These methods are available on trained CatBoost model objects

162

def plot_tree(self, tree_idx=0, pool=None, figsize=(20, 15),

163

save_path=None, title=None):

164

"""

165

Visualize individual decision tree from the ensemble.

166

167

Parameters:

168

- tree_idx: Index of tree to visualize (int)

169

- pool: Pool for leaf value calculation (Pool, optional)

170

- figsize: Figure size for visualization (tuple)

171

- save_path: Path to save tree visualization (string)

172

- title: Plot title (string)

173

174

Returns:

175

Tree visualization plot

176

"""

177

178

def plot_predictions(self, data, target=None, figsize=(10, 6),

179

title="Predictions vs Actual", save_path=None):

180

"""

181

Plot model predictions against actual values.

182

183

Parameters:

184

- data: Input data for predictions (Pool or array-like)

185

- target: True target values (array-like, optional)

186

- figsize: Figure size for matplotlib (tuple)

187

- title: Plot title (string)

188

- save_path: Path to save plot (string)

189

190

Returns:

191

matplotlib.figure.Figure: Predictions scatter plot

192

"""

193

```

194

195

## Visualization Examples

196

197

### Basic Training Visualization

198

199

```python

200

from catboost import CatBoostClassifier

201

from catboost.widget import MetricVisualizer

202

import pandas as pd

203

204

# Prepare data

205

df = pd.read_csv('train.csv')

206

X = df.drop('target', axis=1)

207

y = df['target']

208

209

# Initialize visualizer (in Jupyter notebook)

210

visualizer = MetricVisualizer()

211

212

# Train model with visualization

213

model = CatBoostClassifier(

214

iterations=500,

215

learning_rate=0.1,

216

depth=6,

217

eval_metric='AUC',

218

train_dir='./catboost_training', # Required for visualization

219

verbose=True

220

)

221

222

# Start visualization widget

223

visualizer.start(train_dirs=['./catboost_training'])

224

225

# Fit model (metrics will be visualized in real-time)

226

model.fit(

227

X, y,

228

eval_set=[(X_val, y_val)],

229

plot=True # Enable built-in plotting

230

)

231

232

# Stop visualization when done

233

visualizer.stop()

234

```

235

236

### Advanced Metrics Plotting

237

238

```python

239

from catboost.widget import MetricsPlotter

240

from catboost import CatBoostRegressor, cv

241

import matplotlib.pyplot as plt

242

243

# Initialize plotter

244

plotter = MetricsPlotter()

245

246

# Train model with comprehensive logging

247

model = CatBoostRegressor(

248

iterations=1000,

249

learning_rate=0.05,

250

depth=8,

251

eval_metric=['RMSE', 'MAE', 'R2'],

252

train_dir='./detailed_training',

253

metric_period=10,

254

verbose=100

255

)

256

257

model.fit(

258

X_train, y_train,

259

eval_set=[(X_val, y_val)],

260

early_stopping_rounds=50,

261

use_best_model=True

262

)

263

264

# Plot multiple metrics

265

fig = plotter.plot_metrics(

266

metrics=['RMSE', 'MAE', 'R2'],

267

train_dir='./detailed_training',

268

figsize=(15, 10),

269

title='CatBoost Training Metrics',

270

save_path='training_metrics.png'

271

)

272

273

plt.show()

274

275

# Plot feature importance

276

importance_fig = plotter.plot_feature_importance(

277

model=model,

278

feature_names=X_train.columns.tolist(),

279

max_features=25,

280

title='Top 25 Most Important Features'

281

)

282

283

plt.show()

284

```

285

286

### Cross-Validation Visualization

287

288

```python

289

from catboost import cv, Pool

290

from catboost.widget import MetricsPlotter

291

import numpy as np

292

import matplotlib.pyplot as plt

293

294

# Create pool for CV

295

cv_pool = Pool(X_train, y_train, cat_features=['category'])

296

297

# Perform cross-validation with detailed logging

298

cv_results = cv(

299

pool=cv_pool,

300

params={

301

'iterations': 500,

302

'learning_rate': 0.1,

303

'depth': 6,

304

'loss_function': 'RMSE',

305

'eval_metric': 'RMSE',

306

'train_dir': './cv_training'

307

},

308

fold_count=5,

309

shuffle=True,

310

partition_random_seed=42,

311

plot=True,

312

verbose=50

313

)

314

315

# Extract scores for custom plotting

316

train_scores = cv_results['train-RMSE-mean'].values

317

val_scores = cv_results['test-RMSE-mean'].values

318

train_std = cv_results['train-RMSE-std'].values

319

val_std = cv_results['test-RMSE-std'].values

320

321

# Create custom learning curve with confidence intervals

322

plotter = MetricsPlotter()

323

fig, ax = plt.subplots(figsize=(12, 8))

324

325

iterations = np.arange(1, len(train_scores) + 1)

326

327

# Plot mean scores

328

ax.plot(iterations, train_scores, 'b-', label='Training RMSE', linewidth=2)

329

ax.plot(iterations, val_scores, 'r-', label='Validation RMSE', linewidth=2)

330

331

# Add confidence intervals

332

ax.fill_between(iterations, train_scores - train_std, train_scores + train_std,

333

alpha=0.2, color='blue')

334

ax.fill_between(iterations, val_scores - val_std, val_scores + val_std,

335

alpha=0.2, color='red')

336

337

ax.set_xlabel('Iteration')

338

ax.set_ylabel('RMSE')

339

ax.set_title('5-Fold Cross-Validation Learning Curves')

340

ax.legend()

341

ax.grid(True, alpha=0.3)

342

343

plt.tight_layout()

344

plt.savefig('cv_learning_curves.png', dpi=300, bbox_inches='tight')

345

plt.show()

346

347

print(f"Best CV score: {val_scores.min():.4f} ± {val_std[val_scores.argmin()]:.4f}")

348

```

349

350

### Framework Compatibility Examples

351

352

```python

353

from catboost import CatBoostClassifier

354

from catboost.widget import XGBPlottingCallback, lgbm_plotting_callback

355

356

# XGBoost-style plotting

357

xgb_callback = XGBPlottingCallback(period=25, show_stdv=True, figsize=(12, 8))

358

359

model_xgb_style = CatBoostClassifier(

360

iterations=300,

361

learning_rate=0.1,

362

depth=6,

363

verbose=False

364

)

365

366

model_xgb_style.fit(

367

X_train, y_train,

368

eval_set=[(X_val, y_val)],

369

callbacks=[xgb_callback]

370

)

371

372

# LightGBM-style plotting

373

lgbm_callback = lgbm_plotting_callback(period=20, figsize=(10, 6))

374

375

model_lgbm_style = CatBoostClassifier(

376

iterations=300,

377

learning_rate=0.1,

378

depth=6,

379

verbose=False

380

)

381

382

model_lgbm_style.fit(

383

X_train, y_train,

384

eval_set=[(X_val, y_val)],

385

callbacks=[lgbm_callback]

386

)

387

```

388

389

### Interactive Feature Analysis Visualization

390

391

```python

392

from catboost import CatBoostClassifier, EFstrType

393

from catboost.widget import MetricsPlotter

394

import matplotlib.pyplot as plt

395

import seaborn as sns

396

import pandas as pd

397

398

# Train model

399

model = CatBoostClassifier(iterations=200, verbose=False)

400

model.fit(X_train, y_train)

401

402

# Get SHAP values for visualization

403

shap_values = model.get_feature_importance(

404

data=X_test[:100], # First 100 samples for visualization

405

type=EFstrType.ShapValues

406

)

407

408

# Create SHAP summary plot

409

plt.figure(figsize=(12, 8))

410

shap_df = pd.DataFrame(shap_values, columns=X_train.columns)

411

412

# Plot mean absolute SHAP values

413

mean_shap = shap_df.abs().mean().sort_values(ascending=True)

414

plt.barh(range(len(mean_shap)), mean_shap.values)

415

plt.yticks(range(len(mean_shap)), mean_shap.index)

416

plt.xlabel('Mean |SHAP Value|')

417

plt.title('Feature Importance (SHAP Values)')

418

plt.tight_layout()

419

plt.show()

420

421

# Feature interaction heatmap

422

interactions = model.get_feature_importance(type=EFstrType.Interaction)

423

plt.figure(figsize=(12, 10))

424

sns.heatmap(

425

interactions,

426

xticklabels=X_train.columns,

427

yticklabels=X_train.columns,

428

annot=False,

429

cmap='RdBu_r',

430

center=0

431

)

432

plt.title('Feature Interaction Matrix')

433

plt.tight_layout()

434

plt.show()

435

```

436

437

### Custom Visualization Dashboard

438

439

```python

440

import matplotlib.pyplot as plt

441

from matplotlib.gridspec import GridSpec

442

import numpy as np

443

444

def create_training_dashboard(model, X_test, y_test, cv_results=None):

445

"""Create comprehensive training dashboard."""

446

447

fig = plt.figure(figsize=(20, 15))

448

gs = GridSpec(3, 3, figure=fig)

449

450

# 1. Learning curves

451

ax1 = fig.add_subplot(gs[0, :2])

452

if cv_results is not None:

453

iterations = range(1, len(cv_results) + 1)

454

ax1.plot(iterations, cv_results['train-RMSE-mean'], 'b-', label='Train')

455

ax1.plot(iterations, cv_results['test-RMSE-mean'], 'r-', label='Validation')

456

ax1.fill_between(iterations,

457

cv_results['train-RMSE-mean'] - cv_results['train-RMSE-std'],

458

cv_results['train-RMSE-mean'] + cv_results['train-RMSE-std'],

459

alpha=0.2, color='blue')

460

ax1.fill_between(iterations,

461

cv_results['test-RMSE-mean'] - cv_results['test-RMSE-std'],

462

cv_results['test-RMSE-mean'] + cv_results['test-RMSE-std'],

463

alpha=0.2, color='red')

464

ax1.set_title('Learning Curves')

465

ax1.set_xlabel('Iteration')

466

ax1.set_ylabel('RMSE')

467

ax1.legend()

468

ax1.grid(True, alpha=0.3)

469

470

# 2. Feature importance

471

ax2 = fig.add_subplot(gs[0, 2])

472

importance = model.get_feature_importance()

473

top_features = np.argsort(importance)[-10:]

474

ax2.barh(range(len(top_features)), importance[top_features])

475

ax2.set_yticks(range(len(top_features)))

476

ax2.set_yticklabels([f'Feature_{i}' for i in top_features])

477

ax2.set_title('Top 10 Features')

478

ax2.set_xlabel('Importance')

479

480

# 3. Predictions vs Actual

481

ax3 = fig.add_subplot(gs[1, 0])

482

predictions = model.predict(X_test)

483

ax3.scatter(y_test, predictions, alpha=0.6)

484

min_val = min(y_test.min(), predictions.min())

485

max_val = max(y_test.max(), predictions.max())

486

ax3.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)

487

ax3.set_xlabel('Actual')

488

ax3.set_ylabel('Predicted')

489

ax3.set_title('Predictions vs Actual')

490

491

# 4. Residuals

492

ax4 = fig.add_subplot(gs[1, 1])

493

residuals = y_test - predictions

494

ax4.scatter(predictions, residuals, alpha=0.6)

495

ax4.axhline(y=0, color='r', linestyle='--')

496

ax4.set_xlabel('Predicted')

497

ax4.set_ylabel('Residuals')

498

ax4.set_title('Residual Plot')

499

500

# 5. Residual distribution

501

ax5 = fig.add_subplot(gs[1, 2])

502

ax5.hist(residuals, bins=30, alpha=0.7, edgecolor='black')

503

ax5.set_xlabel('Residuals')

504

ax5.set_ylabel('Frequency')

505

ax5.set_title('Residual Distribution')

506

507

# 6. Model metrics summary

508

ax6 = fig.add_subplot(gs[2, :])

509

ax6.axis('off')

510

511

# Calculate metrics

512

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

513

rmse = np.sqrt(mean_squared_error(y_test, predictions))

514

mae = mean_absolute_error(y_test, predictions)

515

r2 = r2_score(y_test, predictions)

516

517

metrics_text = f"""

518

Model Performance Metrics:

519

520

RMSE: {rmse:.4f}

521

MAE: {mae:.4f}

522

R²: {r2:.4f}

523

524

Model Info:

525

Trees: {model.tree_count_}

526

Features: {model.feature_count_}

527

"""

528

529

ax6.text(0.1, 0.5, metrics_text, fontsize=12, verticalalignment='center',

530

bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"))

531

532

plt.tight_layout()

533

plt.savefig('training_dashboard.png', dpi=300, bbox_inches='tight')

534

plt.show()

535

536

# Usage

537

create_training_dashboard(model, X_test, y_test, cv_results)

538

```