A pytest fixture for benchmarking code that automatically calibrates test runs for accurate performance measurements.
—
pytest-benchmark provides comprehensive statistical analysis of benchmark results with multiple output formats including tables, CSV exports, histograms, and cProfile integration. The statistical engine computes various measures of central tendency, variability, and outlier detection.
class Stats:
"""Statistical calculations for benchmark timing data."""
fields = (
'min', 'max', 'mean', 'stddev', 'rounds', 'median',
'iqr', 'q1', 'q3', 'iqr_outliers', 'stddev_outliers',
'outliers', 'ld15iqr', 'hd15iqr', 'ops', 'total'
)
def __init__(self):
"""Initialize with empty data list."""
self.data: list[float] = []
def update(self, duration: float) -> None:
"""Add a timing measurement to the dataset."""
def as_dict(self) -> dict:
"""Return all statistics as a dictionary."""@property
def min(self) -> float:
"""Minimum execution time in seconds."""
@property
def max(self) -> float:
"""Maximum execution time in seconds."""
@property
def mean(self) -> float:
"""Arithmetic mean execution time in seconds."""
@property
def median(self) -> float:
"""Median execution time in seconds."""
@property
def stddev(self) -> float:
"""Standard deviation of execution times in seconds."""
@property
def rounds(self) -> int:
"""Number of timing rounds executed."""
@property
def total(self) -> float:
"""Total execution time across all rounds in seconds."""@property
def q1(self) -> float:
"""First quartile (25th percentile) in seconds."""
@property
def q3(self) -> float:
"""Third quartile (75th percentile) in seconds."""
@property
def iqr(self) -> float:
"""Interquartile range (Q3 - Q1) in seconds."""
@property
def ld15iqr(self) -> float:
"""Lowest datum within 1.5 IQR of Q1 (Tukey's method)."""
@property
def hd15iqr(self) -> float:
"""Highest datum within 1.5 IQR of Q3 (Tukey's method)."""
@property
def iqr_outliers(self) -> int:
"""Count of outliers beyond 1.5 IQR from quartiles."""
@property
def stddev_outliers(self) -> int:
"""Count of outliers beyond one standard deviation from mean."""
@property
def outliers(self) -> str:
"""Formatted string describing outlier counts."""
@property
def ops(self) -> float:
"""Operations per second (1 / mean)."""class TableResults:
"""Formats benchmark results as console tables."""
def __init__(self, benchmarks: list, columns: list, sort_key: str, logger, scale_unit: callable):
"""
Initialize table formatter.
Args:
benchmarks: List of benchmark result objects
columns: List of column names to display
sort_key: Column to sort results by
logger: Logger for output
scale_unit: Function to determine time unit scaling
"""
def display(self, tr) -> None:
"""Display formatted table to terminal."""# Available table columns:
COLUMNS = [
'min', # Minimum time
'max', # Maximum time
'mean', # Mean time
'stddev', # Standard deviation
'median', # Median time
'iqr', # Interquartile range
'outliers', # Outlier summary
'ops', # Operations per second
'rounds', # Number of rounds
'iterations' # Iterations per round
]# Default table output
pytest --benchmark-only
# Custom columns
pytest --benchmark-columns=min,max,mean,ops,rounds
# Sort by different metric
pytest --benchmark-sort=ops
# Group results differently
pytest --benchmark-group-by=funcclass CSVResults:
"""Export benchmark results to CSV format."""
def __init__(self, benchmarks: list, filename: str, logger):
"""
Initialize CSV exporter.
Args:
benchmarks: List of benchmark results
filename: Output CSV filename
logger: Logger instance
"""
def save(self) -> None:
"""Save results to CSV file."""# Export to CSV
pytest --benchmark-csv=results.csv
# CSV with timestamp
pytest --benchmark-csv=benchmark_$(date +%Y%m%d_%H%M%S).csv
# Multiple exports
pytest --benchmark-csv=summary.csv --benchmark-json=detailed.json# Example CSV output structure:
name,min,max,mean,stddev,rounds,median,iqr,q1,q3,iqr_outliers,stddev_outliers,ops,total
test_function[param1],0.001,0.002,0.0015,0.0003,10,0.0014,0.0004,0.0012,0.0016,0,1,666.67,0.015
test_function[param2],0.002,0.003,0.0025,0.0004,10,0.0024,0.0005,0.0021,0.0026,1,0,400.0,0.025# Histogram generation requires pygal and pygaljs
# Install with: pip install pytest-benchmark[histogram]
def generate_histogram(benchmarks: list, filename_prefix: str) -> None:
"""
Generate SVG histograms for benchmark results.
Args:
benchmarks: List of benchmark results
filename_prefix: Prefix for output SVG files
"""# Generate histograms
pytest --benchmark-histogram=charts
# Custom prefix with path
pytest --benchmark-histogram=results/benchmark_charts
# Histograms with comparison
pytest --benchmark-compare=baseline --benchmark-histogram=comparison# Generated files:
charts-test_function.svg # Individual test histogram
charts-comparison.svg # Comparison chart (if --benchmark-compare used)# Enable cProfile with sort column
--benchmark-cprofile COLUMN
# Available sort columns:
--benchmark-cprofile ncalls # Number of calls
--benchmark-cprofile ncalls_recursion # Calls including recursion
--benchmark-cprofile tottime # Total time excluding subcalls
--benchmark-cprofile tottime_per # Total time per call
--benchmark-cprofile cumtime # Cumulative time including subcalls
--benchmark-cprofile cumtime_per # Cumulative time per call
--benchmark-cprofile function_name # Function name# Control profiling behavior
--benchmark-cprofile-loops LOOPS # Iterations to profile (default: 1)
--benchmark-cprofile-top COUNT # Top N functions to display (default: 25)
--benchmark-cprofile-dump PREFIX # Save profile dumps to files# Basic profiling sorted by cumulative time
pytest --benchmark-cprofile=cumtime
# Detailed profiling with more functions shown
pytest --benchmark-cprofile=tottime --benchmark-cprofile-top=50
# Save profile dumps for external analysis
pytest --benchmark-cprofile=cumtime --benchmark-cprofile-dump=profilesdef test_profile_example(benchmark):
def complex_function():
# This will be profiled
data = [x**2 for x in range(10000)]
return sum(data)
result = benchmark(complex_function)
assert result == 333283335000
# Command: pytest --benchmark-cprofile=cumtime --benchmark-cprofile-top=10
# Output shows top functions by cumulative timedef time_unit(seconds: float) -> tuple[str, float]:
"""
Automatically select appropriate time unit.
Args:
seconds: Time value in seconds
Returns:
tuple: (unit_symbol, scale_factor)
Examples:
time_unit(0.000001) -> ('u', 1000000) # microseconds
time_unit(0.001) -> ('m', 1000) # milliseconds
time_unit(1.0) -> ('', 1) # seconds
"""# Force specific time units
pytest --benchmark-time-unit=ns # nanoseconds
pytest --benchmark-time-unit=us # microseconds
pytest --benchmark-time-unit=ms # milliseconds
pytest --benchmark-time-unit=s # seconds
pytest --benchmark-time-unit=auto # automatic (default)class Metadata:
"""Container for benchmark metadata and statistics."""
def __init__(self, fixture, iterations: int, options: dict):
"""
Initialize benchmark metadata.
Args:
fixture: BenchmarkFixture instance
iterations: Number of iterations per round
options: Benchmark configuration options
"""
def as_dict(self, include_data: bool = False) -> dict:
"""
Export metadata as dictionary.
Args:
include_data: Whether to include raw timing data
Returns:
dict: Complete benchmark metadata
"""# Benchmark context automatically includes:
{
"name": str, # Test function name
"fullname": str, # Full pytest node ID
"group": str, # Benchmark group
"params": dict, # Test parameters
"param": str, # Parameter string
"extra_info": dict, # Additional metadata
"stats": dict, # Statistical measures
"options": dict # Benchmark options used
}def pytest_benchmark_generate_json(config, benchmarks, include_data, machine_info, commit_info) -> dict:
"""
Generate JSON report data.
Args:
config: pytest configuration
benchmarks: List of benchmark results
include_data: Whether to include raw timing data
machine_info: Machine information dict
commit_info: Git commit information dict
Returns:
dict: Complete JSON report structure
"""def pytest_benchmark_group_stats(config, benchmarks, group_by):
"""Custom grouping logic for result display."""
def pytest_benchmark_scale_unit(config, unit, benchmarks, best, worst, sort):
"""Custom unit scaling for result display."""def test_trend_analysis(benchmark):
"""Example of capturing trend data."""
def algorithm_v1():
return sum(x**2 for x in range(1000))
result = benchmark(algorithm_v1)
# Results automatically include statistical analysis:
# - Mean execution time with confidence intervals
# - Outlier detection and classification
# - Operations per second calculation
# - Comparison with previous runs (if --benchmark-compare used)
assert result == 332833500def test_statistical_validation(benchmark):
"""Validate statistical measures."""
def consistent_function():
# Function with predictable performance
return sum(range(100))
result = benchmark(consistent_function)
# Access statistics after benchmarking
stats = benchmark.stats
assert stats.min > 0
assert stats.max >= stats.min
assert stats.mean >= stats.min
assert stats.stddev >= 0
assert stats.rounds >= 1
assert result == 4950# Generate comparative reports
pytest --benchmark-save=implementation_a tests/
pytest --benchmark-save=implementation_b tests/
pytest-benchmark compare implementation_a implementation_b --csv=comparison.csvdef test_calibration_debugging(benchmark):
def micro_function():
return 42
# Very fast functions may have calibration challenges
# Use pedantic mode for precise control
result = benchmark.pedantic(
target=micro_function,
rounds=1000, # Many rounds for statistical significance
iterations=10000 # Many iterations per round
)
assert result == 42# Debug timer precision issues
pytest --benchmark-verbose --benchmark-calibration-precision=100def test_outlier_analysis(benchmark):
def variable_function():
# Function with variable performance
import random
time.sleep(random.uniform(0.001, 0.002))
return sum(range(100))
result = benchmark(variable_function)
# Check outlier statistics
stats = benchmark.stats
if stats.iqr_outliers > stats.rounds * 0.1:
# More than 10% outliers - investigate environment
pass
assert result == 4950Install with Tessl CLI
npx tessl i tessl/pypi-pytest-benchmark