0
# Visualization
1
2
XGBoost provides comprehensive visualization tools for model interpretation, including feature importance plots, tree structure visualization, and GraphViz export capabilities. These tools help understand model behavior and decision-making processes.
3
4
## Capabilities
5
6
### Feature Importance Plotting
7
8
Visualize feature importance scores using matplotlib with customizable styling and display options.
9
10
```python { .api }
11
def plot_importance(
12
booster,
13
ax=None,
14
height=0.2,
15
xlim=None,
16
ylim=None,
17
title='Feature importance',
18
xlabel='F score',
19
ylabel='Features',
20
fmap='',
21
importance_type='weight',
22
max_num_features=None,
23
grid=True,
24
show_values=True,
25
values_format='{v}',
26
**kwargs
27
):
28
"""
29
Plot feature importance.
30
31
Parameters:
32
- booster: Booster object or feature importance dict
33
- ax: Matplotlib axes object to plot on
34
- height: Bar height for horizontal bar chart
35
- xlim: X-axis limits as tuple (min, max)
36
- ylim: Y-axis limits as tuple (min, max)
37
- title: Plot title
38
- xlabel: X-axis label
39
- ylabel: Y-axis label
40
- fmap: Feature map file path for feature names
41
- importance_type: Importance type ('weight', 'gain', 'cover', 'total_gain', 'total_cover')
42
- max_num_features: Maximum number of features to display
43
- grid: Whether to show grid lines
44
- show_values: Whether to show importance values on bars
45
- values_format: Format string for importance values
46
- **kwargs: Additional matplotlib arguments
47
48
Returns:
49
matplotlib.axes.Axes: The axes object containing the plot
50
"""
51
```
52
53
### Tree Structure Visualization
54
55
Plot individual decision trees from the ensemble with customizable layout and styling.
56
57
```python { .api }
58
def plot_tree(
59
booster,
60
fmap='',
61
num_trees=0,
62
rankdir=None,
63
ax=None,
64
**kwargs
65
):
66
"""
67
Plot specified tree.
68
69
Parameters:
70
- booster: Booster object to plot
71
- fmap: Feature map file path for feature names
72
- num_trees: Tree index to plot (0-based)
73
- rankdir: Direction of tree layout ('UT', 'LR', 'TB', 'BT')
74
- ax: Matplotlib axes object to plot on
75
- **kwargs: Additional graphviz or matplotlib arguments
76
77
Returns:
78
matplotlib.axes.Axes: The axes object containing the plot
79
80
Note:
81
Requires graphviz package for tree rendering.
82
"""
83
```
84
85
### GraphViz Export
86
87
Export model trees to GraphViz DOT format for external visualization and processing.
88
89
```python { .api }
90
def to_graphviz(
91
booster,
92
fmap='',
93
num_trees=0,
94
rankdir=None,
95
yes_color=None,
96
no_color=None,
97
condition_node_params=None,
98
leaf_node_params=None,
99
**kwargs
100
):
101
"""
102
Convert specified tree to graphviz format.
103
104
Parameters:
105
- booster: Booster object to convert
106
- fmap: Feature map file path for feature names
107
- num_trees: Tree index to convert (0-based)
108
- rankdir: Direction of graph layout ('UT', 'LR', 'TB', 'BT')
109
- yes_color: Edge color for 'yes' branches (hex color)
110
- no_color: Edge color for 'no' branches (hex color)
111
- condition_node_params: Dictionary of graphviz node parameters for condition nodes
112
- leaf_node_params: Dictionary of graphviz node parameters for leaf nodes
113
- **kwargs: Additional graphviz parameters
114
115
Returns:
116
graphviz.Source: GraphViz source object that can be rendered
117
118
Note:
119
Requires graphviz package. Returned object can be saved or rendered:
120
- graph.render('tree', format='png') saves to file
121
- graph.view() opens in viewer
122
"""
123
```
124
125
## Usage Examples
126
127
### Feature Importance Visualization
128
129
```python
130
import xgboost as xgb
131
import matplotlib.pyplot as plt
132
from sklearn.datasets import load_boston
133
134
# Train model
135
X, y = load_boston(return_X_y=True)
136
dtrain = xgb.DMatrix(X, label=y)
137
params = {'objective': 'reg:squarederror', 'max_depth': 3}
138
model = xgb.train(params, dtrain, num_boost_round=100)
139
140
# Plot feature importance
141
fig, ax = plt.subplots(figsize=(10, 8))
142
xgb.plot_importance(
143
model,
144
ax=ax,
145
importance_type='gain',
146
max_num_features=10,
147
title='Top 10 Feature Importance (Gain)',
148
show_values=True
149
)
150
plt.tight_layout()
151
plt.show()
152
153
# Custom styling
154
xgb.plot_importance(
155
model,
156
height=0.5,
157
xlim=(0, 0.1),
158
grid=False,
159
color='green',
160
title='Feature Importance',
161
xlabel='Importance Score',
162
ylabel='Feature Names'
163
)
164
```
165
166
### Tree Visualization
167
168
```python
169
import xgboost as xgb
170
import matplotlib.pyplot as plt
171
172
# Train model
173
dtrain = xgb.DMatrix(X, label=y)
174
model = xgb.train(params, dtrain, num_boost_round=5)
175
176
# Plot first tree
177
fig, ax = plt.subplots(figsize=(15, 10))
178
xgb.plot_tree(
179
model,
180
num_trees=0,
181
ax=ax,
182
rankdir='TB' # Top to bottom layout
183
)
184
plt.show()
185
186
# Plot multiple trees
187
fig, axes = plt.subplots(2, 2, figsize=(20, 15))
188
for i, ax in enumerate(axes.flat):
189
if i < 4: # Plot first 4 trees
190
xgb.plot_tree(model, num_trees=i, ax=ax)
191
ax.set_title(f'Tree {i}')
192
plt.tight_layout()
193
plt.show()
194
```
195
196
### GraphViz Export
197
198
```python
199
import xgboost as xgb
200
201
# Train model
202
model = xgb.train(params, dtrain, num_boost_round=3)
203
204
# Export to GraphViz
205
graph = xgb.to_graphviz(
206
model,
207
num_trees=0,
208
rankdir='LR', # Left to right layout
209
yes_color='#0000FF', # Blue for yes branches
210
no_color='#FF0000', # Red for no branches
211
condition_node_params={'shape': 'box', 'style': 'filled', 'fillcolor': 'lightblue'},
212
leaf_node_params={'shape': 'ellipse', 'style': 'filled', 'fillcolor': 'lightgreen'}
213
)
214
215
# Save to file
216
graph.render('tree_visualization', format='png', cleanup=True)
217
218
# View in default viewer
219
graph.view()
220
221
# Get DOT source code
222
dot_source = graph.source
223
print(dot_source)
224
```
225
226
### Feature Map Usage
227
228
```python
229
# Create feature map file
230
feature_names = ['feature_0', 'feature_1', 'feature_2', 'price', 'age']
231
with open('feature_map.txt', 'w') as f:
232
for i, name in enumerate(feature_names):
233
f.write(f'{i}\t{name}\tq\n') # q for quantitative
234
235
# Use feature map in visualization
236
xgb.plot_importance(
237
model,
238
fmap='feature_map.txt',
239
title='Feature Importance with Custom Names'
240
)
241
242
xgb.plot_tree(
243
model,
244
fmap='feature_map.txt',
245
num_trees=0,
246
rankdir='TB'
247
)
248
249
graph = xgb.to_graphviz(
250
model,
251
fmap='feature_map.txt',
252
num_trees=0
253
)
254
```
255
256
### Advanced Importance Analysis
257
258
```python
259
# Get importance scores directly
260
importance_weight = model.get_score(importance_type='weight')
261
importance_gain = model.get_score(importance_type='gain')
262
importance_cover = model.get_score(importance_type='cover')
263
264
print("Feature importance by weight:", importance_weight)
265
print("Feature importance by gain:", importance_gain)
266
print("Feature importance by cover:", importance_cover)
267
268
# Compare different importance types
269
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
270
271
for ax, imp_type in zip(axes, ['weight', 'gain', 'cover']):
272
xgb.plot_importance(
273
model,
274
ax=ax,
275
importance_type=imp_type,
276
title=f'Feature Importance ({imp_type.title()})',
277
max_num_features=10
278
)
279
280
plt.tight_layout()
281
plt.show()
282
```
283
284
## Dependencies
285
286
The visualization functions require additional packages:
287
288
- **matplotlib**: Required for plot_importance and plot_tree
289
- **graphviz**: Required for to_graphviz and plot_tree
290
- Install with: `pip install graphviz`
291
- Also requires GraphViz system binaries
292
293
## Feature Map Format
294
295
Feature map files use tab-separated format:
296
```
297
<feature_id>\t<feature_name>\t<feature_type>
298
```
299
300
Where feature_type can be:
301
- `q`: quantitative/numerical
302
- `c`: categorical
303
- `i`: indicator/binary