Powerful data structures for data analysis, time series, and statistics
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive plotting and visualization capabilities using matplotlib backend, including statistical plots, multivariate visualizations, and DataFrame/Series plotting methods for creating publication-ready charts and graphs.
import pandas as pd
from pandas import plotting
from pandas.plotting import (
scatter_matrix, radviz, andrews_curves, parallel_coordinates,
bootstrap_plot, lag_plot, autocorrelation_plot, table
)Primary plotting interface available through .plot accessor on DataFrame and Series objects.
class PlotAccessor:
"""
Make plots of Series or DataFrame using matplotlib backend.
Parameters:
- data: Series or DataFrame, the object for which the method is called
- x: label or position, only used if data is a DataFrame
- y: label, position or list of positions, allows plotting of one column versus another
- kind: str, the kind of plot to produce
- ax: matplotlib axes object, axes of the current figure
- subplots: bool or sequence, whether to group columns into subplots
- sharex: bool, share x axis in case subplots=True
- sharey: bool, share y axis in case subplots=True
- layout: tuple (rows, cols), layout of subplots
- figsize: tuple (width, height), figure size in inches
- use_index: bool, use index as ticks for x axis
- title: str or list, title to use for the plot
- grid: bool, axis grid lines
- legend: bool or {'reverse'}, place legend on axis subplots
- style: list or dict, matplotlib line style per column
- logx: bool, use log scaling on x axis
- logy: bool, use log scaling on y axis
- loglog: bool, use log scaling on both x and y axes
- xticks: sequence, values to use for the xticks
- yticks: sequence, values to use for the yticks
- xlim: 2-tuple/list, set the x limits of the current axes
- ylim: 2-tuple/list, set the y limits of the current axes
- rot: int, rotation for ticks
- fontsize: int, font size for xticks and yticks
- colormap: str or matplotlib colormap, colormap to select colors from
- colorbar: bool, if True, plot colorbar (only relevant for scatter and hexbin plots)
- position: float, specify relative alignments for bar plot layout
- table: bool, Series or DataFrame, or True for drawing a table
- yerr: DataFrame, Series, array-like, dict, or str, equivalent to xerr
- xerr: DataFrame, Series, array-like, dict, or str, equivalent to yerr
- stacked: bool, in line and bar plots, if True, create stacked plot
- sort_columns: bool, sort column names to determine plot ordering
- secondary_y: bool or sequence, whether to plot on secondary y-axis
- mark_right: bool, when using secondary_y axis, mark the column labels
- include_bool: bool, if True, boolean values can be plotted
- backend: str, backend to use instead of the backend specified in the option
"""
def __call__(self, x=None, y=None, kind='line', ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, colorbar=None, position=0.5, table=False, yerr=None, xerr=None, stacked=False, sort_columns=False, secondary_y=False, mark_right=True, include_bool=False, backend=None, **kwargs):
"""Create a plot with various visualization types."""
def line(self, x=None, y=None, **kwargs):
"""Plot Series or DataFrame as lines."""
def bar(self, x=None, y=None, **kwargs):
"""Make a vertical bar plot."""
def barh(self, x=None, y=None, **kwargs):
"""Make a horizontal bar plot."""
def box(self, by=None, **kwargs):
"""Make a box plot of the DataFrame columns."""
def hist(self, by=None, bins=10, **kwargs):
"""Draw one histogram of the DataFrame's columns."""
def kde(self, bw_method=None, ind=None, **kwargs):
"""Generate Kernel Density Estimate plot using Gaussian kernels."""
def density(self, bw_method=None, ind=None, **kwargs):
"""Generate Kernel Density Estimate plot using Gaussian kernels (alias for kde)."""
def area(self, x=None, y=None, stacked=True, **kwargs):
"""Draw a stacked area plot."""
def pie(self, y=None, **kwargs):
"""Generate a pie plot."""
def scatter(self, x, y, s=None, c=None, **kwargs):
"""Create a scatter plot with varying marker point size and color."""
def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
"""Generate a hexagonal binning plot."""Functions for creating histograms and box plots from DataFrame and Series data.
def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, backend=None, legend=False, **kwargs):
"""
Draw histogram of the input series using matplotlib.
Parameters:
- by: object, if passed, used to form histograms for separate groups
- ax: matplotlib axis object, if not passed, uses gca()
- grid: bool, whether to show axis grid lines
- xlabelsize: int, if specified changes the x-axis label size
- xrot: float, rotation of x axis labels
- ylabelsize: int, if specified changes the y-axis label size
- yrot: float, rotation of y axis labels
- figsize: tuple, figure size in inches by default
- bins: int or sequence, number of histogram bins to be used
- backend: str, backend to use instead of the backend specified in the option
- legend: bool, whether to show the legend
Returns:
matplotlib.axes.Axes or numpy.ndarray of them
"""
def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, backend=None, legend=False, **kwargs):
"""
Make a histogram of the DataFrame's columns.
Parameters:
- data: DataFrame, the pandas object holding the data
- column: str or sequence, if passed, will be used to limit data to a subset of columns
- by: object, if passed, then used to form histograms for separate groups
- grid: bool, whether to show axis grid lines
- xlabelsize: int, if specified changes the x-axis label size
- xrot: float, rotation of x axis labels
- ylabelsize: int, if specified changes the y-axis label size
- yrot: float, rotation of y axis labels
- ax: matplotlib axes object, if not passed, uses gca()
- sharex: bool, in case subplots=True, share x axis and set some x axis labels to invisible
- sharey: bool, in case subplots=True, share y axis and set some y axis labels to invisible
- figsize: tuple, figure size in inches by default
- layout: tuple, (rows, columns) for the layout of the histograms
- bins: int or sequence, number of histogram bins to be used
- backend: str, backend to use for plotting
- legend: bool, whether to show the legend
Returns:
matplotlib.axes.Axes or numpy.ndarray of them
"""
def boxplot(data, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwargs):
"""
Make a box plot from DataFrame columns.
Parameters:
- data: DataFrame, the pandas object holding the data
- column: str or list of str, column name or list of names, or vector
- by: str or list of str, column in the DataFrame to group by
- ax: matplotlib axes object, axes object to draw the plot onto
- fontsize: float or str, tick label font size in points or as a string
- rot: int, rotation angle of labels (in degrees)
- grid: bool, setting this to True will show the grid
- figsize: tuple, a tuple (width, height) in inches
- layout: tuple, (rows, columns) for the layout of the plot
- return_type: str, the kind of object to return
Returns:
result : varies based on return_type parameter
"""
def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, backend=None, **kwargs):
"""
Make a box plot of the DataFrame columns.
Parameters:
- column: str or list of str, column name or sequence
- by: str or array-like, column in the DataFrame to group by
- ax: matplotlib axes object, axes object to draw the plot onto
- fontsize: float or str, tick label font size
- rot: int, rotation angle of labels
- grid: bool, setting this to True will show the grid
- figsize: tuple, figure size in inches
- layout: tuple, (rows, columns) for the layout of the plot
- return_type: str, the kind of object to return
- backend: str, backend to use for plotting
Returns:
result : varies based on return_type parameter
"""
def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, rot=0, grid=True, ax=None, figsize=None, layout=None, sharex=False, sharey=True, backend=None, **kwargs):
"""
Make box plots from DataFrameGroupBy data.
Parameters:
- grouped: Grouped DataFrame
- subplots: bool, False - no subplots will be used, True - create a subplot for each group
- column: column name or list of names, or vector
- fontsize: float or str, tick label font size
- rot: int, label rotation angle
- grid: bool, setting this to True will show the grid
- ax: matplotlib axis object
- figsize: tuple, figure size in inches
- layout: tuple, (rows, columns) for the layout of subplots
- sharex: bool, whether to share the x axis between subplots
- sharey: bool, whether to share the y axis between subplots
- backend: str, backend to use for plotting
Returns:
matplotlib.axes.Axes or numpy.ndarray of them
"""Advanced plotting functions for exploring relationships between multiple variables.
def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwargs):
"""
Draw a matrix of scatter plots.
Parameters:
- frame: DataFrame, data for scatter matrix
- alpha: float, amount of transparency applied
- figsize: tuple (width, height), figure size in inches
- ax: matplotlib axis object
- grid: bool, setting this to True will show the grid
- diagonal: str {'hist', 'kde'}, pick between 'kde' and 'hist' for diagonal plots
- marker: str, matplotlib marker type
- density_kwds: dict, keyword arguments to be passed to kernel density estimate plot
- hist_kwds: dict, keyword arguments to be passed to hist function
- range_padding: float, relative extension of axis range
Returns:
numpy.ndarray: A matrix of scatter plots
"""
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
"""
Plot a multidimensional dataset in 2D.
Each Series in the DataFrame is represented as a evenly distributed
slice on a circle. RadViz allows projection of N-dimensional data set into 2D space.
Parameters:
- frame: DataFrame, object holding the data
- class_column: str, column name containing the name of the data point category
- ax: matplotlib.axes.Axes, a plot instance to which to add the information
- color: list or tuple of str, assign a color to each category
- colormap: str or matplotlib.colors.Colormap, colormap to select colors from
Returns:
matplotlib.axes.Axes
"""
def andrews_curves(frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs):
"""
Generate a matplotlib plot for visualizing clusters of multivariate data.
Andrews curves have the functional form:
f(t) = x_1/sqrt(2) + x_2*sin(t) + x_3*cos(t) + x_4*sin(2t) + x_5*cos(2t) + ...
Parameters:
- frame: DataFrame, data to be plotted, preferably normalized to (0.0, 1.0)
- class_column: str, name of the column containing class names
- ax: matplotlib axes object, axes to use
- samples: int, number of points to plot in each curve
- color: list or tuple of str, colors to use for the different classes
- colormap: str or matplotlib colormap object, colormap to select colors from
Returns:
matplotlib.axes.Axes
"""
def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, use_columns=False, xticks=None, colormap=None, axvlines=True, axvlines_kwds=None, sort_labels=False, **kwargs):
"""
Parallel coordinates plotting.
Parameters:
- frame: DataFrame, data for parallel coordinates plot
- class_column: str, column name containing class names
- cols: list, column names to use
- ax: matplotlib axis object
- color: list or tuple, colors to use for the different classes
- use_columns: bool, if true, columns will be used as xticks
- xticks: list or tuple, values to use for xticks
- colormap: str or matplotlib colormap, colormap to use for line colors
- axvlines: bool, if true, vertical lines will be added at each xtick
- axvlines_kwds: dict, options to be passed to axvline method for vertical lines
- sort_labels: bool, sort class_column labels
Returns:
matplotlib.axes.Axes
"""Specialized plots for analyzing temporal patterns and relationships in time series data.
def lag_plot(series, lag=1, ax=None, **kwds):
"""
Lag plot for time series.
Parameters:
- series: Series, the time series to visualize
- lag: int, lag length of the scatter plot
- ax: matplotlib axis object, the matplotlib axis object to use
Returns:
matplotlib.axes.Axes
"""
def autocorrelation_plot(series, ax=None, **kwargs):
"""
Autocorrelation plot for time series.
The horizontal lines in the plot correspond to 95% and 99% confidence bands.
The dashed line is 99% confidence band.
Parameters:
- series: Series, the time series to visualize
- ax: matplotlib axis object, the matplotlib axis object to use
Returns:
matplotlib.axes.Axes
"""Bootstrap resampling visualization for uncertainty estimation.
def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
"""
Bootstrap plot on mean, median and mid-range statistics.
The bootstrap plot is used to estimate the uncertainty of a statistic
by relying on random sampling with replacement.
Parameters:
- series: Series, series from where to get the samplings for the bootstrapping
- fig: matplotlib.figure.Figure, if given, it will use the fig reference for plotting
- size: int, number of data points to consider during each sampling
- samples: int, number of times the bootstrap procedure is performed
Returns:
matplotlib.figure.Figure
"""Display utilities and matplotlib converter management.
def table(ax, data, **kwargs):
"""
Helper function to convert DataFrame and Series to matplotlib.table.
Parameters:
- ax: matplotlib axes object, axes to draw table on
- data: DataFrame or Series, data for table contents
- **kwargs: keyword arguments passed to matplotlib.table.table
Returns:
matplotlib.table.Table: matplotlib table object
"""
def register_matplotlib_converters():
"""
Register pandas formatters and converters with matplotlib.
This function modifies the global matplotlib.units.registry dictionary.
Pandas adds custom converters for pd.Timestamp, pd.Period, np.datetime64,
datetime.datetime, datetime.date, and datetime.time.
"""
def deregister_matplotlib_converters():
"""
Remove pandas formatters and converters.
Removes the custom converters added by register_matplotlib_converters.
This attempts to set the state of the registry back to the state before
pandas registered its own units.
"""Configuration options and styling utilities for customizing plot appearance.
class _Options(dict):
"""
Stores pandas plotting options.
Allows for parameter aliasing so you can use parameter names that are
the same as the plot function parameters, stored in canonical format.
"""
def use(self, key, value):
"""
Temporarily set a parameter value using the with statement.
Parameters:
- key: str, parameter name (aliasing allowed)
- value: any, parameter value to set temporarily
Returns:
context manager for temporary parameter setting
"""
def reset(self):
"""Reset the option store to its initial state."""
# Global plot parameters object
plot_params = _Options()# Plot kind constants
class PlotKind:
LINE = 'line'
BAR = 'bar'
BARH = 'barh'
HIST = 'hist'
BOX = 'box'
KDE = 'kde'
DENSITY = 'density'
AREA = 'area'
PIE = 'pie'
SCATTER = 'scatter'
HEXBIN = 'hexbin'
# Diagonal plot options for scatter_matrix
class DiagonalKind:
HIST = 'hist'
KDE = 'kde'
# Plot accessor class
class PlotAccessor:
"""Plotting accessor for Series and DataFrame objects."""
def __init__(self, data): ...
def __call__(self, *args, **kwargs): ...
# Individual plot methods
line: Callable
bar: Callable
barh: Callable
box: Callable
hist: Callable
kde: Callable
density: Callable
area: Callable
pie: Callable
scatter: Callable # DataFrame only
hexbin: Callable # DataFrame only
# Matplotlib integration types
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from matplotlib.table import Table
from matplotlib.colors import Colormap
import numpy as np
# Return types for plotting functions
PlotResult = Axes | np.ndarray | Figure | TableInstall with Tessl CLI
npx tessl i tessl/pypi-pandas