tessl/pypi-pytest-benchmark

A pytest fixture for benchmarking code that automatically calibrates test runs for accurate performance measurements.

—

Pending

Overview

Eval results

Files

Core Benchmarking

Name: tessl/pypi-pytest-benchmark
Author: tessl

Overview

The core benchmarking functionality provides the primary benchmark fixture that automatically calibrates test runs for accurate performance measurements. It offers two main modes: automatic benchmarking with benchmark() and pedantic mode with benchmark.pedantic() for fine-grained control.

Imports

# The benchmark fixture is automatically available in pytest tests
import pytest

# For programmatic access to benchmarking classes (rarely needed)
from pytest_benchmark.fixture import BenchmarkFixture, FixtureAlreadyUsed

Core APIs

benchmark fixture

@pytest.fixture
def benchmark(request) -> BenchmarkFixture:
    """
    Primary pytest fixture for benchmarking functions.
    
    Returns:
        BenchmarkFixture: The benchmark fixture instance for the current test.
    """

BenchmarkFixture.call

def __call__(self, function_to_benchmark, *args, **kwargs) -> Any:
    """
    Benchmark a function with automatic calibration and timing.
    
    Args:
        function_to_benchmark: The function to benchmark
        *args: Positional arguments to pass to the function
        **kwargs: Keyword arguments to pass to the function
        
    Returns:
        Any: The return value of the benchmarked function
        
    Raises:
        FixtureAlreadyUsed: If the fixture has already been used in this test
    """

BenchmarkFixture.pedantic

def pedantic(self, target, args=(), kwargs=None, setup=None, rounds=1, warmup_rounds=0, iterations=1) -> Any:
    """
    Benchmark with precise control over execution parameters.
    
    Args:
        target: The function to benchmark
        args: Tuple of positional arguments (default: ())
        kwargs: Dict of keyword arguments (default: None)
        setup: Setup function called before each round (default: None)
        rounds: Number of measurement rounds (default: 1)
        warmup_rounds: Number of warmup rounds (default: 0)
        iterations: Number of iterations per round (default: 1)
        
    Returns:
        Any: The return value of the benchmarked function
        
    Raises:
        ValueError: If iterations, rounds, or warmup_rounds are invalid
        TypeError: If setup returns arguments when args/kwargs are also provided
        FixtureAlreadyUsed: If the fixture has already been used in this test
    """

BenchmarkFixture.weave

def weave(self, target, **kwargs) -> None:
    """
    Apply benchmarking to a target function using aspect-oriented programming.
    
    Args:
        target: The function, method, or object to benchmark
        **kwargs: Additional arguments passed to aspectlib.weave()
        
    Raises:
        ImportError: If aspectlib is not installed
        FixtureAlreadyUsed: If the fixture has already been used in this test
    """

BenchmarkFixture.patch

# Alias for weave method
patch = weave

BenchmarkFixture Properties and Attributes

@property
def enabled(self) -> bool:
    """Whether benchmarking is enabled (not disabled)."""

# Instance attributes (not properties)
name: str                     # The test function name
fullname: str                 # The full test node ID  
param: str | None             # Test parametrization ID if parametrized, None otherwise
params: dict | None           # Test parametrization parameters if parametrized, None otherwise
group: str | None             # Benchmark group name if specified, None otherwise
has_error: bool               # Whether the benchmark encountered an error
extra_info: dict              # Additional benchmark information
stats: 'Metadata' | None      # Benchmark statistics after execution, None before execution

Usage Examples

Basic Function Benchmarking

def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

def test_fibonacci_benchmark(benchmark):
    # Automatic calibration determines optimal iterations
    result = benchmark(fibonacci, 20)
    assert result == 6765

Benchmarking with Arguments

def string_operations(text, count):
    result = []
    for i in range(count):
        result.append(text.upper().replace(' ', '_'))
    return result

def test_string_operations(benchmark):
    result = benchmark(string_operations, "hello world", 1000)
    assert len(result) == 1000
    assert result[0] == "HELLO_WORLD"

Pedantic Mode Examples

Basic Pedantic Benchmarking

def test_pedantic_basic(benchmark):
    def expensive_operation():
        return sum(x**2 for x in range(10000))
    
    result = benchmark.pedantic(
        target=expensive_operation,
        rounds=5,           # Run 5 measurement rounds
        iterations=10       # 10 iterations per round
    )
    assert result == 333283335000

Pedantic with Setup Function

def test_pedantic_with_setup(benchmark):
    def create_data():
        # Setup function returns (args, kwargs)
        data = list(range(10000))
        return (data,), {}
    
    def process_data(data):
        return sum(x**2 for x in data)
    
    result = benchmark.pedantic(
        target=process_data,
        setup=create_data,
        rounds=3,
        warmup_rounds=1
    )
    assert result == 333283335000

Pedantic with Explicit Arguments

def test_pedantic_with_args(benchmark):
    def multiply_matrices(a, b, size):
        result = [[0] * size for _ in range(size)]
        for i in range(size):
            for j in range(size):
                for k in range(size):
                    result[i][j] += a[i][k] * b[k][j]
        return result
    
    # Create test matrices
    size = 50
    matrix_a = [[1] * size for _ in range(size)]
    matrix_b = [[2] * size for _ in range(size)]
    
    result = benchmark.pedantic(
        target=multiply_matrices,
        args=(matrix_a, matrix_b, size),
        rounds=3,
        iterations=1
    )
    assert result[0][0] == 100  # 50 * 1 * 2

Calibration and Timing

The benchmark fixture automatically calibrates the number of iterations to ensure reliable measurements:

Timer Precision: Computes timer precision for the platform
Minimum Time: Ensures each measurement round meets minimum time thresholds
Calibration: Automatically determines optimal number of iterations
Warmup: Optional warmup rounds to stabilize performance
Statistics: Collects timing data across multiple rounds

Calibration Process

def test_calibration_behavior(benchmark):
    def fast_function():
        return sum(range(100))
    
    # The fixture will automatically:
    # 1. Measure timer precision
    # 2. Run calibration to find optimal iterations
    # 3. Execute warmup rounds if configured
    # 4. Run the actual benchmark rounds
    # 5. Collect and analyze statistics
    result = benchmark(fast_function)
    assert result == 4950

Exception Handling

FixtureAlreadyUsed

class FixtureAlreadyUsed(Exception):
    """Raised when benchmark fixture is used more than once in a test."""

def test_fixture_single_use(benchmark):
    benchmark(lambda: 42)
    
    # This would raise FixtureAlreadyUsed
    # benchmark(lambda: 24)  # Error!

Error States

def test_error_handling(benchmark):
    def failing_function():
        raise ValueError("Something went wrong")
    
    # Benchmark properly handles and propagates exceptions
    with pytest.raises(ValueError):
        benchmark(failing_function)
    
    # Fixture.has_error will be True
    assert benchmark.has_error

Integration with pytest

Test Parametrization

@pytest.mark.parametrize("size", [100, 1000, 10000])
def test_scaling_benchmark(benchmark, size):
    def process_list(n):
        return sum(range(n))
    
    result = benchmark(process_list, size)
    expected = size * (size - 1) // 2
    assert result == expected

Test Collection and Skipping

The benchmark fixture integrates with pytest's test collection and skipping mechanisms. Tests with benchmarks are automatically identified and can be controlled via command-line options like --benchmark-skip and --benchmark-only.

Install with Tessl CLI