0
# Data Visualization and Plotting
1
2
Comprehensive plotting and visualization capabilities using matplotlib backend, including statistical plots, multivariate visualizations, and DataFrame/Series plotting methods for creating publication-ready charts and graphs.
3
4
## Core Imports
5
6
```python
7
import pandas as pd
8
from pandas import plotting
9
from pandas.plotting import (
10
scatter_matrix, radviz, andrews_curves, parallel_coordinates,
11
bootstrap_plot, lag_plot, autocorrelation_plot, table
12
)
13
```
14
15
## Capabilities
16
17
### DataFrame and Series Plotting Methods
18
19
Primary plotting interface available through `.plot` accessor on DataFrame and Series objects.
20
21
```python { .api }
22
class PlotAccessor:
23
"""
24
Make plots of Series or DataFrame using matplotlib backend.
25
26
Parameters:
27
- data: Series or DataFrame, the object for which the method is called
28
- x: label or position, only used if data is a DataFrame
29
- y: label, position or list of positions, allows plotting of one column versus another
30
- kind: str, the kind of plot to produce
31
- ax: matplotlib axes object, axes of the current figure
32
- subplots: bool or sequence, whether to group columns into subplots
33
- sharex: bool, share x axis in case subplots=True
34
- sharey: bool, share y axis in case subplots=True
35
- layout: tuple (rows, cols), layout of subplots
36
- figsize: tuple (width, height), figure size in inches
37
- use_index: bool, use index as ticks for x axis
38
- title: str or list, title to use for the plot
39
- grid: bool, axis grid lines
40
- legend: bool or {'reverse'}, place legend on axis subplots
41
- style: list or dict, matplotlib line style per column
42
- logx: bool, use log scaling on x axis
43
- logy: bool, use log scaling on y axis
44
- loglog: bool, use log scaling on both x and y axes
45
- xticks: sequence, values to use for the xticks
46
- yticks: sequence, values to use for the yticks
47
- xlim: 2-tuple/list, set the x limits of the current axes
48
- ylim: 2-tuple/list, set the y limits of the current axes
49
- rot: int, rotation for ticks
50
- fontsize: int, font size for xticks and yticks
51
- colormap: str or matplotlib colormap, colormap to select colors from
52
- colorbar: bool, if True, plot colorbar (only relevant for scatter and hexbin plots)
53
- position: float, specify relative alignments for bar plot layout
54
- table: bool, Series or DataFrame, or True for drawing a table
55
- yerr: DataFrame, Series, array-like, dict, or str, equivalent to xerr
56
- xerr: DataFrame, Series, array-like, dict, or str, equivalent to yerr
57
- stacked: bool, in line and bar plots, if True, create stacked plot
58
- sort_columns: bool, sort column names to determine plot ordering
59
- secondary_y: bool or sequence, whether to plot on secondary y-axis
60
- mark_right: bool, when using secondary_y axis, mark the column labels
61
- include_bool: bool, if True, boolean values can be plotted
62
- backend: str, backend to use instead of the backend specified in the option
63
"""
64
65
def __call__(self, x=None, y=None, kind='line', ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, colorbar=None, position=0.5, table=False, yerr=None, xerr=None, stacked=False, sort_columns=False, secondary_y=False, mark_right=True, include_bool=False, backend=None, **kwargs):
66
"""Create a plot with various visualization types."""
67
68
def line(self, x=None, y=None, **kwargs):
69
"""Plot Series or DataFrame as lines."""
70
71
def bar(self, x=None, y=None, **kwargs):
72
"""Make a vertical bar plot."""
73
74
def barh(self, x=None, y=None, **kwargs):
75
"""Make a horizontal bar plot."""
76
77
def box(self, by=None, **kwargs):
78
"""Make a box plot of the DataFrame columns."""
79
80
def hist(self, by=None, bins=10, **kwargs):
81
"""Draw one histogram of the DataFrame's columns."""
82
83
def kde(self, bw_method=None, ind=None, **kwargs):
84
"""Generate Kernel Density Estimate plot using Gaussian kernels."""
85
86
def density(self, bw_method=None, ind=None, **kwargs):
87
"""Generate Kernel Density Estimate plot using Gaussian kernels (alias for kde)."""
88
89
def area(self, x=None, y=None, stacked=True, **kwargs):
90
"""Draw a stacked area plot."""
91
92
def pie(self, y=None, **kwargs):
93
"""Generate a pie plot."""
94
95
def scatter(self, x, y, s=None, c=None, **kwargs):
96
"""Create a scatter plot with varying marker point size and color."""
97
98
def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
99
"""Generate a hexagonal binning plot."""
100
```
101
102
### Statistical Distribution Plots
103
104
Functions for creating histograms and box plots from DataFrame and Series data.
105
106
```python { .api }
107
def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, backend=None, legend=False, **kwargs):
108
"""
109
Draw histogram of the input series using matplotlib.
110
111
Parameters:
112
- by: object, if passed, used to form histograms for separate groups
113
- ax: matplotlib axis object, if not passed, uses gca()
114
- grid: bool, whether to show axis grid lines
115
- xlabelsize: int, if specified changes the x-axis label size
116
- xrot: float, rotation of x axis labels
117
- ylabelsize: int, if specified changes the y-axis label size
118
- yrot: float, rotation of y axis labels
119
- figsize: tuple, figure size in inches by default
120
- bins: int or sequence, number of histogram bins to be used
121
- backend: str, backend to use instead of the backend specified in the option
122
- legend: bool, whether to show the legend
123
124
Returns:
125
matplotlib.axes.Axes or numpy.ndarray of them
126
"""
127
128
def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, backend=None, legend=False, **kwargs):
129
"""
130
Make a histogram of the DataFrame's columns.
131
132
Parameters:
133
- data: DataFrame, the pandas object holding the data
134
- column: str or sequence, if passed, will be used to limit data to a subset of columns
135
- by: object, if passed, then used to form histograms for separate groups
136
- grid: bool, whether to show axis grid lines
137
- xlabelsize: int, if specified changes the x-axis label size
138
- xrot: float, rotation of x axis labels
139
- ylabelsize: int, if specified changes the y-axis label size
140
- yrot: float, rotation of y axis labels
141
- ax: matplotlib axes object, if not passed, uses gca()
142
- sharex: bool, in case subplots=True, share x axis and set some x axis labels to invisible
143
- sharey: bool, in case subplots=True, share y axis and set some y axis labels to invisible
144
- figsize: tuple, figure size in inches by default
145
- layout: tuple, (rows, columns) for the layout of the histograms
146
- bins: int or sequence, number of histogram bins to be used
147
- backend: str, backend to use for plotting
148
- legend: bool, whether to show the legend
149
150
Returns:
151
matplotlib.axes.Axes or numpy.ndarray of them
152
"""
153
154
def boxplot(data, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwargs):
155
"""
156
Make a box plot from DataFrame columns.
157
158
Parameters:
159
- data: DataFrame, the pandas object holding the data
160
- column: str or list of str, column name or list of names, or vector
161
- by: str or list of str, column in the DataFrame to group by
162
- ax: matplotlib axes object, axes object to draw the plot onto
163
- fontsize: float or str, tick label font size in points or as a string
164
- rot: int, rotation angle of labels (in degrees)
165
- grid: bool, setting this to True will show the grid
166
- figsize: tuple, a tuple (width, height) in inches
167
- layout: tuple, (rows, columns) for the layout of the plot
168
- return_type: str, the kind of object to return
169
170
Returns:
171
result : varies based on return_type parameter
172
"""
173
174
def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, backend=None, **kwargs):
175
"""
176
Make a box plot of the DataFrame columns.
177
178
Parameters:
179
- column: str or list of str, column name or sequence
180
- by: str or array-like, column in the DataFrame to group by
181
- ax: matplotlib axes object, axes object to draw the plot onto
182
- fontsize: float or str, tick label font size
183
- rot: int, rotation angle of labels
184
- grid: bool, setting this to True will show the grid
185
- figsize: tuple, figure size in inches
186
- layout: tuple, (rows, columns) for the layout of the plot
187
- return_type: str, the kind of object to return
188
- backend: str, backend to use for plotting
189
190
Returns:
191
result : varies based on return_type parameter
192
"""
193
194
def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, rot=0, grid=True, ax=None, figsize=None, layout=None, sharex=False, sharey=True, backend=None, **kwargs):
195
"""
196
Make box plots from DataFrameGroupBy data.
197
198
Parameters:
199
- grouped: Grouped DataFrame
200
- subplots: bool, False - no subplots will be used, True - create a subplot for each group
201
- column: column name or list of names, or vector
202
- fontsize: float or str, tick label font size
203
- rot: int, label rotation angle
204
- grid: bool, setting this to True will show the grid
205
- ax: matplotlib axis object
206
- figsize: tuple, figure size in inches
207
- layout: tuple, (rows, columns) for the layout of subplots
208
- sharex: bool, whether to share the x axis between subplots
209
- sharey: bool, whether to share the y axis between subplots
210
- backend: str, backend to use for plotting
211
212
Returns:
213
matplotlib.axes.Axes or numpy.ndarray of them
214
"""
215
```
216
217
### Multivariate Analysis Plots
218
219
Advanced plotting functions for exploring relationships between multiple variables.
220
221
```python { .api }
222
def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwargs):
223
"""
224
Draw a matrix of scatter plots.
225
226
Parameters:
227
- frame: DataFrame, data for scatter matrix
228
- alpha: float, amount of transparency applied
229
- figsize: tuple (width, height), figure size in inches
230
- ax: matplotlib axis object
231
- grid: bool, setting this to True will show the grid
232
- diagonal: str {'hist', 'kde'}, pick between 'kde' and 'hist' for diagonal plots
233
- marker: str, matplotlib marker type
234
- density_kwds: dict, keyword arguments to be passed to kernel density estimate plot
235
- hist_kwds: dict, keyword arguments to be passed to hist function
236
- range_padding: float, relative extension of axis range
237
238
Returns:
239
numpy.ndarray: A matrix of scatter plots
240
"""
241
242
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
243
"""
244
Plot a multidimensional dataset in 2D.
245
246
Each Series in the DataFrame is represented as a evenly distributed
247
slice on a circle. RadViz allows projection of N-dimensional data set into 2D space.
248
249
Parameters:
250
- frame: DataFrame, object holding the data
251
- class_column: str, column name containing the name of the data point category
252
- ax: matplotlib.axes.Axes, a plot instance to which to add the information
253
- color: list or tuple of str, assign a color to each category
254
- colormap: str or matplotlib.colors.Colormap, colormap to select colors from
255
256
Returns:
257
matplotlib.axes.Axes
258
"""
259
260
def andrews_curves(frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs):
261
"""
262
Generate a matplotlib plot for visualizing clusters of multivariate data.
263
264
Andrews curves have the functional form:
265
f(t) = x_1/sqrt(2) + x_2*sin(t) + x_3*cos(t) + x_4*sin(2t) + x_5*cos(2t) + ...
266
267
Parameters:
268
- frame: DataFrame, data to be plotted, preferably normalized to (0.0, 1.0)
269
- class_column: str, name of the column containing class names
270
- ax: matplotlib axes object, axes to use
271
- samples: int, number of points to plot in each curve
272
- color: list or tuple of str, colors to use for the different classes
273
- colormap: str or matplotlib colormap object, colormap to select colors from
274
275
Returns:
276
matplotlib.axes.Axes
277
"""
278
279
def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, use_columns=False, xticks=None, colormap=None, axvlines=True, axvlines_kwds=None, sort_labels=False, **kwargs):
280
"""
281
Parallel coordinates plotting.
282
283
Parameters:
284
- frame: DataFrame, data for parallel coordinates plot
285
- class_column: str, column name containing class names
286
- cols: list, column names to use
287
- ax: matplotlib axis object
288
- color: list or tuple, colors to use for the different classes
289
- use_columns: bool, if true, columns will be used as xticks
290
- xticks: list or tuple, values to use for xticks
291
- colormap: str or matplotlib colormap, colormap to use for line colors
292
- axvlines: bool, if true, vertical lines will be added at each xtick
293
- axvlines_kwds: dict, options to be passed to axvline method for vertical lines
294
- sort_labels: bool, sort class_column labels
295
296
Returns:
297
matplotlib.axes.Axes
298
"""
299
```
300
301
### Time Series Visualization
302
303
Specialized plots for analyzing temporal patterns and relationships in time series data.
304
305
```python { .api }
306
def lag_plot(series, lag=1, ax=None, **kwds):
307
"""
308
Lag plot for time series.
309
310
Parameters:
311
- series: Series, the time series to visualize
312
- lag: int, lag length of the scatter plot
313
- ax: matplotlib axis object, the matplotlib axis object to use
314
315
Returns:
316
matplotlib.axes.Axes
317
"""
318
319
def autocorrelation_plot(series, ax=None, **kwargs):
320
"""
321
Autocorrelation plot for time series.
322
323
The horizontal lines in the plot correspond to 95% and 99% confidence bands.
324
The dashed line is 99% confidence band.
325
326
Parameters:
327
- series: Series, the time series to visualize
328
- ax: matplotlib axis object, the matplotlib axis object to use
329
330
Returns:
331
matplotlib.axes.Axes
332
"""
333
```
334
335
### Statistical Bootstrap Analysis
336
337
Bootstrap resampling visualization for uncertainty estimation.
338
339
```python { .api }
340
def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
341
"""
342
Bootstrap plot on mean, median and mid-range statistics.
343
344
The bootstrap plot is used to estimate the uncertainty of a statistic
345
by relying on random sampling with replacement.
346
347
Parameters:
348
- series: Series, series from where to get the samplings for the bootstrapping
349
- fig: matplotlib.figure.Figure, if given, it will use the fig reference for plotting
350
- size: int, number of data points to consider during each sampling
351
- samples: int, number of times the bootstrap procedure is performed
352
353
Returns:
354
matplotlib.figure.Figure
355
"""
356
```
357
358
### Table Display and Matplotlib Integration
359
360
Display utilities and matplotlib converter management.
361
362
```python { .api }
363
def table(ax, data, **kwargs):
364
"""
365
Helper function to convert DataFrame and Series to matplotlib.table.
366
367
Parameters:
368
- ax: matplotlib axes object, axes to draw table on
369
- data: DataFrame or Series, data for table contents
370
- **kwargs: keyword arguments passed to matplotlib.table.table
371
372
Returns:
373
matplotlib.table.Table: matplotlib table object
374
"""
375
376
def register_matplotlib_converters():
377
"""
378
Register pandas formatters and converters with matplotlib.
379
380
This function modifies the global matplotlib.units.registry dictionary.
381
Pandas adds custom converters for pd.Timestamp, pd.Period, np.datetime64,
382
datetime.datetime, datetime.date, and datetime.time.
383
"""
384
385
def deregister_matplotlib_converters():
386
"""
387
Remove pandas formatters and converters.
388
389
Removes the custom converters added by register_matplotlib_converters.
390
This attempts to set the state of the registry back to the state before
391
pandas registered its own units.
392
"""
393
```
394
395
### Plot Configuration and Styling
396
397
Configuration options and styling utilities for customizing plot appearance.
398
399
```python { .api }
400
class _Options(dict):
401
"""
402
Stores pandas plotting options.
403
404
Allows for parameter aliasing so you can use parameter names that are
405
the same as the plot function parameters, stored in canonical format.
406
"""
407
408
def use(self, key, value):
409
"""
410
Temporarily set a parameter value using the with statement.
411
412
Parameters:
413
- key: str, parameter name (aliasing allowed)
414
- value: any, parameter value to set temporarily
415
416
Returns:
417
context manager for temporary parameter setting
418
"""
419
420
def reset(self):
421
"""Reset the option store to its initial state."""
422
423
# Global plot parameters object
424
plot_params = _Options()
425
```
426
427
## Types
428
429
```python { .api }
430
# Plot kind constants
431
class PlotKind:
432
LINE = 'line'
433
BAR = 'bar'
434
BARH = 'barh'
435
HIST = 'hist'
436
BOX = 'box'
437
KDE = 'kde'
438
DENSITY = 'density'
439
AREA = 'area'
440
PIE = 'pie'
441
SCATTER = 'scatter'
442
HEXBIN = 'hexbin'
443
444
# Diagonal plot options for scatter_matrix
445
class DiagonalKind:
446
HIST = 'hist'
447
KDE = 'kde'
448
449
# Plot accessor class
450
class PlotAccessor:
451
"""Plotting accessor for Series and DataFrame objects."""
452
def __init__(self, data): ...
453
def __call__(self, *args, **kwargs): ...
454
455
# Individual plot methods
456
line: Callable
457
bar: Callable
458
barh: Callable
459
box: Callable
460
hist: Callable
461
kde: Callable
462
density: Callable
463
area: Callable
464
pie: Callable
465
scatter: Callable # DataFrame only
466
hexbin: Callable # DataFrame only
467
468
# Matplotlib integration types
469
from matplotlib.axes import Axes
470
from matplotlib.figure import Figure
471
from matplotlib.table import Table
472
from matplotlib.colors import Colormap
473
import numpy as np
474
475
# Return types for plotting functions
476
PlotResult = Axes | np.ndarray | Figure | Table
477
```