A pytest fixture for benchmarking code that automatically calibrates test runs for accurate performance measurements.
—
pytest-benchmark provides robust storage backends for persisting benchmark results and powerful comparison capabilities for tracking performance over time. Results can be stored in files or Elasticsearch and compared across different runs, commits, or environments.
class FileStorage:
"""File-based storage backend for benchmark results."""
def __init__(self, path: str, logger, default_machine_id: str = None):
"""
Initialize file storage.
Args:
path: Directory path for storing benchmark files
logger: Logger instance for output
default_machine_id: Default machine identifier
"""
def save(self, output_json: dict, save: str) -> str:
"""
Save benchmark results to file.
Args:
output_json: Benchmark data in JSON format
save: Save identifier/name
Returns:
str: Path to saved file
"""
def load(self, name: str) -> dict:
"""
Load benchmark results from file.
Args:
name: File identifier to load
Returns:
dict: Loaded benchmark data
"""class ElasticsearchStorage:
"""Elasticsearch storage backend for benchmark results."""
def __init__(self, hosts: list, index: str, doctype: str, project_name: str = None, logger=None, **kwargs):
"""
Initialize Elasticsearch storage.
Args:
hosts: List of Elasticsearch host URLs
index: Index name for storing benchmarks
doctype: Document type (deprecated in ES 7+)
project_name: Project identifier
logger: Logger instance
**kwargs: Additional Elasticsearch client options
"""
def save(self, output_json: dict, save: str) -> str:
"""Save benchmark results to Elasticsearch."""
def load(self, name: str) -> dict:
"""Load benchmark results from Elasticsearch."""# Default file storage
--benchmark-storage=file://./.benchmarks
# Absolute path
--benchmark-storage=file:///home/user/benchmarks
# Relative path
--benchmark-storage=file://./results/benchmarks# Basic Elasticsearch
--benchmark-storage=elasticsearch+http://localhost:9200/benchmarks/results
# With authentication
--benchmark-storage=elasticsearch+https://user:pass@host:9200/index/doctype
# Multiple hosts
--benchmark-storage=elasticsearch+http://host1:9200,host2:9200/index/doctype
# With project name
--benchmark-storage=elasticsearch+http://host:9200/index/doctype?project_name=myproject# Save with custom name
pytest --benchmark-save=baseline
# Save with descriptive name
pytest --benchmark-save=feature-x-implementation
# Auto-save with timestamp
pytest --benchmark-autosavedef test_with_custom_save(benchmark):
def my_function():
return sum(range(1000))
result = benchmark(my_function)
# Results automatically saved if --benchmark-save is used
assert result == 499500# Save only statistics (default)
pytest --benchmark-save=baseline
# Save complete timing data
pytest --benchmark-save=baseline --benchmark-save-data# Compare against latest saved
pytest --benchmark-compare
# Compare against specific run
pytest --benchmark-compare=baseline
pytest --benchmark-compare=0001
# Compare with failure thresholds
pytest --benchmark-compare=baseline --benchmark-compare-fail=mean:10%# List available runs
pytest-benchmark list
# Compare specific runs
pytest-benchmark compare 0001 0002
# Compare with filters
pytest-benchmark compare 'Linux-CPython-3.9-64bit/*'
# Display comparison table
pytest-benchmark compare --help# First, establish baseline
pytest --benchmark-save=baseline tests/
# Later, compare new implementation
pytest --benchmark-compare=baseline tests/# In CI pipeline
# 1. Run benchmarks and save
pytest --benchmark-only --benchmark-save=commit-${BUILD_ID}
# 2. Compare against master baseline
pytest --benchmark-only --benchmark-compare=master-baseline \
--benchmark-compare-fail=mean:15%# Save results for different Python versions
pytest --benchmark-save=python38 tests/
pytest --benchmark-save=python39 tests/
pytest --benchmark-save=python310 tests/
# Compare across versions
pytest-benchmark compare python38 python39 python310# Threshold expression formats:
"mean:5%" # Mean increased by more than 5%
"min:0.001" # Min increased by more than 1ms
"max:10%" # Max increased by more than 10%
"stddev:25%" # Standard deviation increased by 25%# Multiple failure conditions
pytest --benchmark-compare=baseline \
--benchmark-compare-fail=mean:10% \
--benchmark-compare-fail=max:20% \
--benchmark-compare-fail=min:0.005def test_performance_sensitive_function(benchmark):
def critical_function():
# This function's performance is critical
return sum(x**2 for x in range(10000))
result = benchmark(critical_function)
assert result == 333283335000
# Run with regression detection
# pytest --benchmark-compare=baseline --benchmark-compare-fail=mean:5%def pytest_benchmark_generate_machine_info() -> dict:
"""
Generate machine information for benchmark context.
Returns:
dict: Machine information including:
- node: Machine hostname
- processor: Processor name
- machine: Machine architecture
- python_implementation: CPython/PyPy/etc
- python_version: Python version
- system: Operating system
- cpu: CPU information from py-cpuinfo
"""# Benchmarks warn if machine info differs
pytest --benchmark-compare=baseline
# Warning: Benchmark machine_info is different. Current: {...} VS saved: {...}.benchmarks/
├── Linux-CPython-3.9-64bit/
│ ├── 0001_baseline.json
│ ├── 0002_feature_x.json
│ └── 0003_master.json
└── machine_info.json{
"_index": "benchmarks",
"_type": "results",
"_id": "0001_baseline",
"_source": {
"machine_info": {...},
"commit_info": {...},
"benchmarks": [...],
"datetime": "2023-01-01T12:00:00Z",
"version": "5.1.0"
}
}# Export with full timing data
pytest --benchmark-json=complete.json --benchmark-save-data# Complete benchmark JSON format:
{
"machine_info": {
"node": str,
"processor": str,
"machine": str,
"python_implementation": str,
"python_version": str,
"system": str,
"cpu": dict
},
"commit_info": {
"id": str,
"time": str,
"author_time": str,
"author_name": str,
"author_email": str,
"message": str,
"branch": str
},
"benchmarks": [
{
"group": str,
"name": str,
"fullname": str,
"params": dict,
"param": str,
"extra_info": dict,
"stats": {
"min": float,
"max": float,
"mean": float,
"stddev": float,
"rounds": int,
"median": float,
"iqr": float,
"q1": float,
"q3": float,
"iqr_outliers": int,
"stddev_outliers": int,
"outliers": str,
"ld15iqr": float,
"hd15iqr": float,
"ops": float,
"total": float
},
"data": [float, ...] # If --benchmark-save-data used
}
],
"datetime": str,
"version": str
}def pytest_benchmark_generate_commit_info(config):
"""Custom commit info generation."""
return {
"id": "custom-build-123",
"branch": "feature/optimization",
"message": "Performance improvements",
"time": "2023-01-01T12:00:00Z"
}# Using netrc for Elasticsearch auth
echo "machine elasticsearch.example.com login user password secret" >> ~/.netrc
pytest --benchmark-storage=elasticsearch+https://elasticsearch.example.com:9200/bench/result \
--benchmark-netrc=~/.netrc# Compare only specific test patterns
pytest-benchmark compare baseline current --benchmark-filter="*string*"
# Compare specific groups
pytest-benchmark compare baseline current --group="database"# Check storage connectivity
pytest --benchmark-storage=file://./test-storage --benchmark-save=test
# Verify Elasticsearch connection
pytest --benchmark-storage=elasticsearch+http://localhost:9200/test/bench \
--benchmark-save=connectivity-test# Debug comparison issues
pytest --benchmark-compare=baseline --benchmark-verbose
# List available runs for comparison
pytest-benchmark list --storage=file://.benchmarksInstall with Tessl CLI
npx tessl i tessl/pypi-pytest-benchmark