or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-types.mdconfiguration.mdcore-data-structures.mddata-io.mddata-manipulation.mddata-types.mderrors.mdindex.mdplotting.mdstatistics-math.mdtime-series.md

plotting.mddocs/

0

# Data Visualization and Plotting

1

2

Comprehensive plotting and visualization capabilities using matplotlib backend, including statistical plots, multivariate visualizations, and DataFrame/Series plotting methods for creating publication-ready charts and graphs.

3

4

## Core Imports

5

6

```python

7

import pandas as pd

8

from pandas import plotting

9

from pandas.plotting import (

10

scatter_matrix, radviz, andrews_curves, parallel_coordinates,

11

bootstrap_plot, lag_plot, autocorrelation_plot, table

12

)

13

```

14

15

## Capabilities

16

17

### DataFrame and Series Plotting Methods

18

19

Primary plotting interface available through `.plot` accessor on DataFrame and Series objects.

20

21

```python { .api }

22

class PlotAccessor:

23

"""

24

Make plots of Series or DataFrame using matplotlib backend.

25

26

Parameters:

27

- data: Series or DataFrame, the object for which the method is called

28

- x: label or position, only used if data is a DataFrame

29

- y: label, position or list of positions, allows plotting of one column versus another

30

- kind: str, the kind of plot to produce

31

- ax: matplotlib axes object, axes of the current figure

32

- subplots: bool or sequence, whether to group columns into subplots

33

- sharex: bool, share x axis in case subplots=True

34

- sharey: bool, share y axis in case subplots=True

35

- layout: tuple (rows, cols), layout of subplots

36

- figsize: tuple (width, height), figure size in inches

37

- use_index: bool, use index as ticks for x axis

38

- title: str or list, title to use for the plot

39

- grid: bool, axis grid lines

40

- legend: bool or {'reverse'}, place legend on axis subplots

41

- style: list or dict, matplotlib line style per column

42

- logx: bool, use log scaling on x axis

43

- logy: bool, use log scaling on y axis

44

- loglog: bool, use log scaling on both x and y axes

45

- xticks: sequence, values to use for the xticks

46

- yticks: sequence, values to use for the yticks

47

- xlim: 2-tuple/list, set the x limits of the current axes

48

- ylim: 2-tuple/list, set the y limits of the current axes

49

- rot: int, rotation for ticks

50

- fontsize: int, font size for xticks and yticks

51

- colormap: str or matplotlib colormap, colormap to select colors from

52

- colorbar: bool, if True, plot colorbar (only relevant for scatter and hexbin plots)

53

- position: float, specify relative alignments for bar plot layout

54

- table: bool, Series or DataFrame, or True for drawing a table

55

- yerr: DataFrame, Series, array-like, dict, or str, equivalent to xerr

56

- xerr: DataFrame, Series, array-like, dict, or str, equivalent to yerr

57

- stacked: bool, in line and bar plots, if True, create stacked plot

58

- sort_columns: bool, sort column names to determine plot ordering

59

- secondary_y: bool or sequence, whether to plot on secondary y-axis

60

- mark_right: bool, when using secondary_y axis, mark the column labels

61

- include_bool: bool, if True, boolean values can be plotted

62

- backend: str, backend to use instead of the backend specified in the option

63

"""

64

65

def __call__(self, x=None, y=None, kind='line', ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, colorbar=None, position=0.5, table=False, yerr=None, xerr=None, stacked=False, sort_columns=False, secondary_y=False, mark_right=True, include_bool=False, backend=None, **kwargs):

66

"""Create a plot with various visualization types."""

67

68

def line(self, x=None, y=None, **kwargs):

69

"""Plot Series or DataFrame as lines."""

70

71

def bar(self, x=None, y=None, **kwargs):

72

"""Make a vertical bar plot."""

73

74

def barh(self, x=None, y=None, **kwargs):

75

"""Make a horizontal bar plot."""

76

77

def box(self, by=None, **kwargs):

78

"""Make a box plot of the DataFrame columns."""

79

80

def hist(self, by=None, bins=10, **kwargs):

81

"""Draw one histogram of the DataFrame's columns."""

82

83

def kde(self, bw_method=None, ind=None, **kwargs):

84

"""Generate Kernel Density Estimate plot using Gaussian kernels."""

85

86

def density(self, bw_method=None, ind=None, **kwargs):

87

"""Generate Kernel Density Estimate plot using Gaussian kernels (alias for kde)."""

88

89

def area(self, x=None, y=None, stacked=True, **kwargs):

90

"""Draw a stacked area plot."""

91

92

def pie(self, y=None, **kwargs):

93

"""Generate a pie plot."""

94

95

def scatter(self, x, y, s=None, c=None, **kwargs):

96

"""Create a scatter plot with varying marker point size and color."""

97

98

def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):

99

"""Generate a hexagonal binning plot."""

100

```

101

102

### Statistical Distribution Plots

103

104

Functions for creating histograms and box plots from DataFrame and Series data.

105

106

```python { .api }

107

def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, backend=None, legend=False, **kwargs):

108

"""

109

Draw histogram of the input series using matplotlib.

110

111

Parameters:

112

- by: object, if passed, used to form histograms for separate groups

113

- ax: matplotlib axis object, if not passed, uses gca()

114

- grid: bool, whether to show axis grid lines

115

- xlabelsize: int, if specified changes the x-axis label size

116

- xrot: float, rotation of x axis labels

117

- ylabelsize: int, if specified changes the y-axis label size

118

- yrot: float, rotation of y axis labels

119

- figsize: tuple, figure size in inches by default

120

- bins: int or sequence, number of histogram bins to be used

121

- backend: str, backend to use instead of the backend specified in the option

122

- legend: bool, whether to show the legend

123

124

Returns:

125

matplotlib.axes.Axes or numpy.ndarray of them

126

"""

127

128

def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, backend=None, legend=False, **kwargs):

129

"""

130

Make a histogram of the DataFrame's columns.

131

132

Parameters:

133

- data: DataFrame, the pandas object holding the data

134

- column: str or sequence, if passed, will be used to limit data to a subset of columns

135

- by: object, if passed, then used to form histograms for separate groups

136

- grid: bool, whether to show axis grid lines

137

- xlabelsize: int, if specified changes the x-axis label size

138

- xrot: float, rotation of x axis labels

139

- ylabelsize: int, if specified changes the y-axis label size

140

- yrot: float, rotation of y axis labels

141

- ax: matplotlib axes object, if not passed, uses gca()

142

- sharex: bool, in case subplots=True, share x axis and set some x axis labels to invisible

143

- sharey: bool, in case subplots=True, share y axis and set some y axis labels to invisible

144

- figsize: tuple, figure size in inches by default

145

- layout: tuple, (rows, columns) for the layout of the histograms

146

- bins: int or sequence, number of histogram bins to be used

147

- backend: str, backend to use for plotting

148

- legend: bool, whether to show the legend

149

150

Returns:

151

matplotlib.axes.Axes or numpy.ndarray of them

152

"""

153

154

def boxplot(data, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwargs):

155

"""

156

Make a box plot from DataFrame columns.

157

158

Parameters:

159

- data: DataFrame, the pandas object holding the data

160

- column: str or list of str, column name or list of names, or vector

161

- by: str or list of str, column in the DataFrame to group by

162

- ax: matplotlib axes object, axes object to draw the plot onto

163

- fontsize: float or str, tick label font size in points or as a string

164

- rot: int, rotation angle of labels (in degrees)

165

- grid: bool, setting this to True will show the grid

166

- figsize: tuple, a tuple (width, height) in inches

167

- layout: tuple, (rows, columns) for the layout of the plot

168

- return_type: str, the kind of object to return

169

170

Returns:

171

result : varies based on return_type parameter

172

"""

173

174

def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, backend=None, **kwargs):

175

"""

176

Make a box plot of the DataFrame columns.

177

178

Parameters:

179

- column: str or list of str, column name or sequence

180

- by: str or array-like, column in the DataFrame to group by

181

- ax: matplotlib axes object, axes object to draw the plot onto

182

- fontsize: float or str, tick label font size

183

- rot: int, rotation angle of labels

184

- grid: bool, setting this to True will show the grid

185

- figsize: tuple, figure size in inches

186

- layout: tuple, (rows, columns) for the layout of the plot

187

- return_type: str, the kind of object to return

188

- backend: str, backend to use for plotting

189

190

Returns:

191

result : varies based on return_type parameter

192

"""

193

194

def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, rot=0, grid=True, ax=None, figsize=None, layout=None, sharex=False, sharey=True, backend=None, **kwargs):

195

"""

196

Make box plots from DataFrameGroupBy data.

197

198

Parameters:

199

- grouped: Grouped DataFrame

200

- subplots: bool, False - no subplots will be used, True - create a subplot for each group

201

- column: column name or list of names, or vector

202

- fontsize: float or str, tick label font size

203

- rot: int, label rotation angle

204

- grid: bool, setting this to True will show the grid

205

- ax: matplotlib axis object

206

- figsize: tuple, figure size in inches

207

- layout: tuple, (rows, columns) for the layout of subplots

208

- sharex: bool, whether to share the x axis between subplots

209

- sharey: bool, whether to share the y axis between subplots

210

- backend: str, backend to use for plotting

211

212

Returns:

213

matplotlib.axes.Axes or numpy.ndarray of them

214

"""

215

```

216

217

### Multivariate Analysis Plots

218

219

Advanced plotting functions for exploring relationships between multiple variables.

220

221

```python { .api }

222

def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwargs):

223

"""

224

Draw a matrix of scatter plots.

225

226

Parameters:

227

- frame: DataFrame, data for scatter matrix

228

- alpha: float, amount of transparency applied

229

- figsize: tuple (width, height), figure size in inches

230

- ax: matplotlib axis object

231

- grid: bool, setting this to True will show the grid

232

- diagonal: str {'hist', 'kde'}, pick between 'kde' and 'hist' for diagonal plots

233

- marker: str, matplotlib marker type

234

- density_kwds: dict, keyword arguments to be passed to kernel density estimate plot

235

- hist_kwds: dict, keyword arguments to be passed to hist function

236

- range_padding: float, relative extension of axis range

237

238

Returns:

239

numpy.ndarray: A matrix of scatter plots

240

"""

241

242

def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):

243

"""

244

Plot a multidimensional dataset in 2D.

245

246

Each Series in the DataFrame is represented as a evenly distributed

247

slice on a circle. RadViz allows projection of N-dimensional data set into 2D space.

248

249

Parameters:

250

- frame: DataFrame, object holding the data

251

- class_column: str, column name containing the name of the data point category

252

- ax: matplotlib.axes.Axes, a plot instance to which to add the information

253

- color: list or tuple of str, assign a color to each category

254

- colormap: str or matplotlib.colors.Colormap, colormap to select colors from

255

256

Returns:

257

matplotlib.axes.Axes

258

"""

259

260

def andrews_curves(frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs):

261

"""

262

Generate a matplotlib plot for visualizing clusters of multivariate data.

263

264

Andrews curves have the functional form:

265

f(t) = x_1/sqrt(2) + x_2*sin(t) + x_3*cos(t) + x_4*sin(2t) + x_5*cos(2t) + ...

266

267

Parameters:

268

- frame: DataFrame, data to be plotted, preferably normalized to (0.0, 1.0)

269

- class_column: str, name of the column containing class names

270

- ax: matplotlib axes object, axes to use

271

- samples: int, number of points to plot in each curve

272

- color: list or tuple of str, colors to use for the different classes

273

- colormap: str or matplotlib colormap object, colormap to select colors from

274

275

Returns:

276

matplotlib.axes.Axes

277

"""

278

279

def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, use_columns=False, xticks=None, colormap=None, axvlines=True, axvlines_kwds=None, sort_labels=False, **kwargs):

280

"""

281

Parallel coordinates plotting.

282

283

Parameters:

284

- frame: DataFrame, data for parallel coordinates plot

285

- class_column: str, column name containing class names

286

- cols: list, column names to use

287

- ax: matplotlib axis object

288

- color: list or tuple, colors to use for the different classes

289

- use_columns: bool, if true, columns will be used as xticks

290

- xticks: list or tuple, values to use for xticks

291

- colormap: str or matplotlib colormap, colormap to use for line colors

292

- axvlines: bool, if true, vertical lines will be added at each xtick

293

- axvlines_kwds: dict, options to be passed to axvline method for vertical lines

294

- sort_labels: bool, sort class_column labels

295

296

Returns:

297

matplotlib.axes.Axes

298

"""

299

```

300

301

### Time Series Visualization

302

303

Specialized plots for analyzing temporal patterns and relationships in time series data.

304

305

```python { .api }

306

def lag_plot(series, lag=1, ax=None, **kwds):

307

"""

308

Lag plot for time series.

309

310

Parameters:

311

- series: Series, the time series to visualize

312

- lag: int, lag length of the scatter plot

313

- ax: matplotlib axis object, the matplotlib axis object to use

314

315

Returns:

316

matplotlib.axes.Axes

317

"""

318

319

def autocorrelation_plot(series, ax=None, **kwargs):

320

"""

321

Autocorrelation plot for time series.

322

323

The horizontal lines in the plot correspond to 95% and 99% confidence bands.

324

The dashed line is 99% confidence band.

325

326

Parameters:

327

- series: Series, the time series to visualize

328

- ax: matplotlib axis object, the matplotlib axis object to use

329

330

Returns:

331

matplotlib.axes.Axes

332

"""

333

```

334

335

### Statistical Bootstrap Analysis

336

337

Bootstrap resampling visualization for uncertainty estimation.

338

339

```python { .api }

340

def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):

341

"""

342

Bootstrap plot on mean, median and mid-range statistics.

343

344

The bootstrap plot is used to estimate the uncertainty of a statistic

345

by relying on random sampling with replacement.

346

347

Parameters:

348

- series: Series, series from where to get the samplings for the bootstrapping

349

- fig: matplotlib.figure.Figure, if given, it will use the fig reference for plotting

350

- size: int, number of data points to consider during each sampling

351

- samples: int, number of times the bootstrap procedure is performed

352

353

Returns:

354

matplotlib.figure.Figure

355

"""

356

```

357

358

### Table Display and Matplotlib Integration

359

360

Display utilities and matplotlib converter management.

361

362

```python { .api }

363

def table(ax, data, **kwargs):

364

"""

365

Helper function to convert DataFrame and Series to matplotlib.table.

366

367

Parameters:

368

- ax: matplotlib axes object, axes to draw table on

369

- data: DataFrame or Series, data for table contents

370

- **kwargs: keyword arguments passed to matplotlib.table.table

371

372

Returns:

373

matplotlib.table.Table: matplotlib table object

374

"""

375

376

def register_matplotlib_converters():

377

"""

378

Register pandas formatters and converters with matplotlib.

379

380

This function modifies the global matplotlib.units.registry dictionary.

381

Pandas adds custom converters for pd.Timestamp, pd.Period, np.datetime64,

382

datetime.datetime, datetime.date, and datetime.time.

383

"""

384

385

def deregister_matplotlib_converters():

386

"""

387

Remove pandas formatters and converters.

388

389

Removes the custom converters added by register_matplotlib_converters.

390

This attempts to set the state of the registry back to the state before

391

pandas registered its own units.

392

"""

393

```

394

395

### Plot Configuration and Styling

396

397

Configuration options and styling utilities for customizing plot appearance.

398

399

```python { .api }

400

class _Options(dict):

401

"""

402

Stores pandas plotting options.

403

404

Allows for parameter aliasing so you can use parameter names that are

405

the same as the plot function parameters, stored in canonical format.

406

"""

407

408

def use(self, key, value):

409

"""

410

Temporarily set a parameter value using the with statement.

411

412

Parameters:

413

- key: str, parameter name (aliasing allowed)

414

- value: any, parameter value to set temporarily

415

416

Returns:

417

context manager for temporary parameter setting

418

"""

419

420

def reset(self):

421

"""Reset the option store to its initial state."""

422

423

# Global plot parameters object

424

plot_params = _Options()

425

```

426

427

## Types

428

429

```python { .api }

430

# Plot kind constants

431

class PlotKind:

432

LINE = 'line'

433

BAR = 'bar'

434

BARH = 'barh'

435

HIST = 'hist'

436

BOX = 'box'

437

KDE = 'kde'

438

DENSITY = 'density'

439

AREA = 'area'

440

PIE = 'pie'

441

SCATTER = 'scatter'

442

HEXBIN = 'hexbin'

443

444

# Diagonal plot options for scatter_matrix

445

class DiagonalKind:

446

HIST = 'hist'

447

KDE = 'kde'

448

449

# Plot accessor class

450

class PlotAccessor:

451

"""Plotting accessor for Series and DataFrame objects."""

452

def __init__(self, data): ...

453

def __call__(self, *args, **kwargs): ...

454

455

# Individual plot methods

456

line: Callable

457

bar: Callable

458

barh: Callable

459

box: Callable

460

hist: Callable

461

kde: Callable

462

density: Callable

463

area: Callable

464

pie: Callable

465

scatter: Callable # DataFrame only

466

hexbin: Callable # DataFrame only

467

468

# Matplotlib integration types

469

from matplotlib.axes import Axes

470

from matplotlib.figure import Figure

471

from matplotlib.table import Table

472

from matplotlib.colors import Colormap

473

import numpy as np

474

475

# Return types for plotting functions

476

PlotResult = Axes | np.ndarray | Figure | Table

477

```