A Grammar of Graphics for Python providing a declarative approach to data visualization similar to R's ggplot2
—
Statistical transformations (stats) transform your data before visualization through operations like binning, density estimation, smoothing, and statistical summaries. Stats compute new variables that can be mapped to aesthetics, enabling sophisticated data visualizations that go beyond raw data plotting. Each stat has computed aesthetics that provide access to the transformed values.
Basic transformations including pass-through data and counting operations.
def stat_identity(mapping=None, data=None, **kwargs):
"""
Identity transformation (no change to data).
Use this when you want to plot data as-is without any statistical transformation.
"""
def stat_count(mapping=None, data=None, **kwargs):
"""
Count the number of observations at each x position.
Required aesthetics: x
Optional aesthetics: weight
Computed aesthetics:
- count: number of observations
- prop: proportion of total observations
"""
def stat_sum(mapping=None, data=None, **kwargs):
"""
Sum overlapping points and map sum to size.
Required aesthetics: x, y
Optional aesthetics: size, weight
Computed aesthetics:
- n: sum of weights (or count if no weights)
- prop: proportion of total
"""
def stat_unique(mapping=None, data=None, **kwargs):
"""
Remove duplicate rows in data.
Useful for preventing overplotting when you have duplicate points.
"""Transform continuous data into discrete bins for histograms and related visualizations.
def stat_bin(mapping=None, data=None, bins=30, binwidth=None, center=None,
boundary=None, closed='right', pad=False, **kwargs):
"""
Bin data for histograms.
Required aesthetics: x
Optional aesthetics: weight
Parameters:
- bins: int, number of bins
- binwidth: float, width of bins
- center: float, center of one bin
- boundary: float, boundary of one bin
- closed: str, which side of interval is closed ('right', 'left')
- pad: bool, whether to pad bins
Computed aesthetics:
- count: number of observations in bin
- density: density of observations
- ncount: normalized count
- ndensity: normalized density
- width: bin width
"""
def stat_bin_2d(mapping=None, data=None, bins=30, binwidth=None, drop=True,
**kwargs):
"""
2D binning for heatmaps.
Required aesthetics: x, y
Optional aesthetics: weight
Parameters:
- bins: int or tuple, number of bins in each direction
- binwidth: float or tuple, width of bins
- drop: bool, whether to drop empty bins
Computed aesthetics:
- count: number of observations in bin
- density: density of observations
"""
def stat_bin2d(mapping=None, data=None, bins=30, binwidth=None, drop=True,
**kwargs):
"""
2D binning for heatmaps - alternative name for stat_bin_2d.
Required aesthetics: x, y
Optional aesthetics: weight
Parameters:
- bins: int or tuple, number of bins in each direction
- binwidth: float or tuple, width of bins
- drop: bool, whether to drop empty bins
Computed aesthetics:
- count: number of observations in bin
- density: density of observations
"""
def stat_bindot(mapping=None, data=None, binaxis='x', method='dotdensity',
binwidth=None, **kwargs):
"""
Bin data for dot plots.
Required aesthetics: x
Parameters:
- binaxis: str, axis to bin along ('x', 'y')
- method: str, binning method ('dotdensity', 'histodot')
- binwidth: float, width of bins
Computed aesthetics:
- count: number of observations in bin
- binwidth: width of bin
"""Compute smooth density estimates for continuous distributions.
def stat_density(mapping=None, data=None, bw='nrd0', adjust=1, kernel='gaussian',
n=512, trim=False, **kwargs):
"""
Compute smooth density estimates.
Required aesthetics: x
Optional aesthetics: weight
Parameters:
- bw: str or float, bandwidth selection method or value
- adjust: float, bandwidth adjustment factor
- kernel: str, kernel function ('gaussian', 'epanechnikov', etc.)
- n: int, number of evaluation points
- trim: bool, whether to trim density to data range
Computed aesthetics:
- density: density estimate
- count: density * number of observations
- scaled: density scaled to maximum of 1
"""
def stat_density_2d(mapping=None, data=None, **kwargs):
"""
2D density estimation for contour plots.
Required aesthetics: x, y
Optional aesthetics: weight
Computed aesthetics:
- level: contour level
- piece: contour piece identifier
"""
def stat_ydensity(mapping=None, data=None, **kwargs):
"""
Density estimates for violin plots.
Required aesthetics: x, y
Computed aesthetics:
- density: density estimate
- scaled: density scaled within groups
- count: density * number of observations
- violinwidth: density scaled for violin width
"""Fit smooth curves and trend lines to data.
def stat_smooth(mapping=None, data=None, method='auto', formula=None, se=True,
n=80, span=0.75, level=0.95, **kwargs):
"""
Compute smoothed conditional means.
Required aesthetics: x, y
Optional aesthetics: weight
Parameters:
- method: str, smoothing method ('auto', 'lm', 'glm', 'gam', 'loess')
- formula: str, model formula (for 'lm', 'glm', 'gam')
- se: bool, whether to compute confidence interval
- n: int, number of points to evaluate
- span: float, smoothing span (for 'loess')
- level: float, confidence level
Computed aesthetics:
- y: predicted values
- ymin, ymax: confidence interval bounds (if se=True)
- se: standard errors
"""
def stat_quantile(mapping=None, data=None, quantiles=None, formula=None,
**kwargs):
"""
Compute quantile regression lines.
Required aesthetics: x, y
Optional aesthetics: weight
Parameters:
- quantiles: list, quantiles to compute (default: [0.25, 0.5, 0.75])
- formula: str, model formula
Computed aesthetics:
- quantile: quantile level
"""Compute statistical summaries for box plots and related visualizations.
def stat_boxplot(mapping=None, data=None, coef=1.5, **kwargs):
"""
Compute box plot statistics.
Required aesthetics: x or y (one discrete, one continuous)
Optional aesthetics: weight
Parameters:
- coef: float, multiplier for outlier detection
Computed aesthetics:
- lower: lower hinge (25th percentile)
- upper: upper hinge (75th percentile)
- middle: median (50th percentile)
- ymin: lower whisker
- ymax: upper whisker
- outliers: outlier values
"""
def stat_summary(mapping=None, data=None, fun_data=None, fun_y=None,
fun_ymax=None, fun_ymin=None, **kwargs):
"""
Summarize y values at each x.
Required aesthetics: x, y
Parameters:
- fun_data: function, returns dict with summary statistics
- fun_y: function, compute y summary
- fun_ymax, fun_ymin: functions, compute y range
Computed aesthetics depend on functions used:
- y: summary statistic
- ymin, ymax: range statistics (if computed)
"""
def stat_summary_bin(mapping=None, data=None, bins=30, **kwargs):
"""
Summarize y values in bins of x.
Required aesthetics: x, y
Parameters:
- bins: int, number of bins
- fun_data, fun_y, fun_ymax, fun_ymin: summary functions
Computed aesthetics:
- x: bin centers
- y: summary statistic
- ymin, ymax: range statistics (if computed)
"""Compute geometric transformations and spatial statistics.
def stat_hull(mapping=None, data=None, **kwargs):
"""
Compute convex hull of points.
Required aesthetics: x, y
Optional aesthetics: group
Returns hull vertices in order for drawing polygon.
"""
def stat_ellipse(mapping=None, data=None, type='t', level=0.95, segments=51,
**kwargs):
"""
Compute confidence ellipses.
Required aesthetics: x, y
Parameters:
- type: str, ellipse type ('t', 'norm', 'euclid')
- level: float, confidence level
- segments: int, number of points in ellipse
Computed aesthetics:
- x, y: ellipse boundary points
"""
def stat_sina(mapping=None, data=None, **kwargs):
"""
Compute sina plot positions (jittered violin).
Required aesthetics: x, y
Positions points based on local density to create violin-like shape
with individual points visible.
"""Work with probability distributions and cumulative distributions.
def stat_ecdf(mapping=None, data=None, n=None, pad=True, **kwargs):
"""
Compute empirical cumulative distribution function.
Required aesthetics: x
Parameters:
- n: int, number of points to evaluate (default: use all data points)
- pad: bool, whether to pad with additional points
Computed aesthetics:
- y: cumulative probability
"""
def stat_qq(mapping=None, data=None, distribution='norm', dparams=None, **kwargs):
"""
Compute quantile-quantile plot statistics.
Required aesthetics: sample
Parameters:
- distribution: str or scipy distribution, theoretical distribution
- dparams: tuple, distribution parameters
Computed aesthetics:
- theoretical: theoretical quantiles
- sample: sample quantiles
"""
def stat_qq_line(mapping=None, data=None, distribution='norm', dparams=None,
**kwargs):
"""
Compute reference line for Q-Q plots.
Required aesthetics: sample
Parameters:
- distribution: str or scipy distribution, theoretical distribution
- dparams: tuple, distribution parameters
Computed aesthetics:
- slope, intercept: line parameters
"""Evaluate functions and compute point densities.
def stat_function(mapping=None, data=None, fun=None, xlim=None, n=101,
args=None, **kwargs):
"""
Evaluate and plot functions.
Parameters:
- fun: function, function to evaluate
- xlim: tuple, x range to evaluate over
- n: int, number of points to evaluate
- args: tuple, additional arguments to function
Computed aesthetics:
- x: evaluation points
- y: function values
"""
def stat_pointdensity(mapping=None, data=None, **kwargs):
"""
Compute local point density.
Required aesthetics: x, y
Computed aesthetics:
- density: local point density
- ndensity: normalized density
"""# Map fill to computed count in histogram
ggplot(data, aes(x='value')) + \
geom_histogram(aes(fill=after_stat('count')), stat='bin', bins=20)
# Use density instead of count for histogram
ggplot(data, aes(x='value')) + \
geom_histogram(aes(y=after_stat('density')), stat='bin', bins=20)
# Color points by local density
ggplot(data, aes(x='x', y='y')) + \
geom_point(aes(color=after_stat('density')), stat='pointdensity')# Custom summary function
def mean_se(x):
return {'y': np.mean(x), 'ymin': np.mean(x) - np.std(x)/np.sqrt(len(x)),
'ymax': np.mean(x) + np.std(x)/np.sqrt(len(x))}
ggplot(data, aes(x='group', y='value')) + \
stat_summary(fun_data=mean_se, geom='pointrange')# Density curve with rug plot
ggplot(data, aes(x='value')) + \
stat_density(geom='line') + \
geom_rug(sides='b')
# Smooth with confidence band
ggplot(data, aes(x='x', y='y')) + \
geom_point(alpha=0.5) + \
stat_smooth(method='lm', se=True)Install with Tessl CLI
npx tessl i tessl/pypi-plotnine