CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-atheris

A coverage-guided fuzzer for Python and Python extensions based on libFuzzer

91

1.28x
Overview
Eval results
Files

advanced-features.mddocs/

Advanced Features

Advanced Atheris capabilities including hook management for specialized instrumentation, custom mutators and crossovers, regex pattern generation, and integration with external tools.

Capabilities

Hook Management

Enable specialized instrumentation for regex and string operations to improve fuzzing effectiveness.

class EnabledHooks:
    """Manages the set of enabled instrumentation hooks."""
    
    def add(self, hook: str) -> None:
        """
        Enable a specific instrumentation hook.
        
        Args:
            hook (str): Hook name to enable:
                       - 'RegEx': Instrument regular expression operations
                       - 'str': Instrument string method calls (startswith, endswith)
        """
    
    def __contains__(self, hook: str) -> bool:
        """
        Check if a hook is enabled.
        
        Args:
            hook (str): Hook name to check
            
        Returns:
            bool: True if the hook is enabled
        """

# Global hook manager instance
enabled_hooks: EnabledHooks

Usage Examples:

import atheris
import re

# Enable regex instrumentation before compiling patterns
atheris.enabled_hooks.add("RegEx")

def TestOneInput(data):
    text = data.decode('utf-8', errors='ignore')
    
    # These regex operations will now be instrumented
    if re.search(r'\d{3}-\d{2}-\d{4}', text):
        process_ssn(text)
    
    if re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', text):
        process_email(text)

# Enable string method instrumentation
atheris.enabled_hooks.add("str")

def TestStringMethods(data):
    text = data.decode('utf-8', errors='ignore')
    
    # These string methods will be instrumented
    if text.startswith('HTTP/'):
        parse_http_header(text)
    
    if text.endswith('.json'):
        parse_json_file(text)

Regex Pattern Generation

Generate strings that match regex patterns for improved fuzzing coverage.

def gen_match(pattern):
    """
    Generate a string that matches a regular expression pattern.
    
    Useful for creating seed inputs or understanding what patterns
    a regex is designed to match.
    
    Args:
        pattern (str or bytes): Regular expression pattern
    
    Returns:
        str or bytes: A string that matches the given pattern
        
    Note:
        This is a best-effort generator and may not handle all regex features.
        Complex patterns with lookarounds or advanced features may not be
        fully supported.
    """

Usage Examples:

import atheris

# Generate matching strings for testing
email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
sample_email = atheris.gen_match(email_pattern)
print(f"Generated email: {sample_email}")  # e.g., "a@a.aa"

phone_pattern = r'\(\d{3}\) \d{3}-\d{4}'
sample_phone = atheris.gen_match(phone_pattern)
print(f"Generated phone: {sample_phone}")  # e.g., "(000) 000-0000"

# Use in custom mutators
def custom_mutator(data, max_size, seed):
    if seed % 10 == 0:
        # Occasionally generate valid-looking input
        return atheris.gen_match(r'user:\w+;pass:\w+').encode('utf-8')
    else:
        return atheris.Mutate(data, max_size)

Custom Mutators

Implement domain-specific mutation strategies for more effective fuzzing.

Custom Mutator Function Signature:

def custom_mutator(data: bytes, max_size: int, seed: int) -> bytes:
    """
    Custom mutation function for domain-specific input generation.
    
    Args:
        data (bytes): Input data to mutate (may be empty for initial generation)
        max_size (int): Maximum size of the output in bytes
        seed (int): Random seed for reproducible mutations
    
    Returns:
        bytes: Mutated data, length must be <= max_size
    """

Usage Examples:

import atheris
import zlib
import json
import random

def json_mutator(data, max_size, seed):
    """Custom mutator for JSON data."""
    random.seed(seed)
    
    try:
        # Try to parse existing data as JSON
        if data:
            obj = json.loads(data.decode('utf-8'))
        else:
            obj = {}
    except:
        # If parsing fails, create a basic structure
        obj = {"key": "value"}
    
    # Apply JSON-specific mutations
    mutation_type = random.randint(0, 4)
    
    if mutation_type == 0:
        # Add random key-value pair
        obj[f"key_{random.randint(0, 100)}"] = random.choice([
            random.randint(0, 1000),
            f"value_{random.randint(0, 100)}",
            random.random(),
            random.choice([True, False])
        ])
    elif mutation_type == 1:
        # Mutate existing values
        if obj:
            key = random.choice(list(obj.keys()))
            obj[key] = "mutated_" + str(random.randint(0, 1000))
    elif mutation_type == 2:
        # Add nested structure
        obj["nested"] = {"inner": random.randint(0, 100)}
    else:
        # Use libFuzzer's mutation on serialized data
        serialized = json.dumps(obj).encode('utf-8')
        mutated_serialized = atheris.Mutate(serialized, max_size - 100)
        try:
            json.loads(mutated_serialized.decode('utf-8'))
            return mutated_serialized
        except:
            pass  # Fall through to normal serialization
    
    result = json.dumps(obj).encode('utf-8')
    return result[:max_size]

def compressed_mutator(data, max_size, seed):
    """Custom mutator for compressed data."""
    try:
        # Decompress, mutate, recompress
        decompressed = zlib.decompress(data)
        mutated = atheris.Mutate(decompressed, len(decompressed) * 2)
        return zlib.compress(mutated)[:max_size]
    except:
        # If decompression fails, create valid compressed data
        return zlib.compress(b"Hello " + str(seed).encode())[:max_size]

# Use custom mutators
atheris.Setup(sys.argv, TestOneInput, custom_mutator=json_mutator)
atheris.Fuzz()

Custom Crossovers

Implement domain-specific crossover strategies for combining inputs.

Custom Crossover Function Signature:

def custom_crossover(data1: bytes, data2: bytes, max_out_size: int, seed: int) -> bytes:
    """
    Custom crossover function for domain-specific input combination.
    
    Args:
        data1 (bytes): First input to combine
        data2 (bytes): Second input to combine  
        max_out_size (int): Maximum size of the output in bytes
        seed (int): Random seed for reproducible crossovers
    
    Returns:
        bytes: Combined data, length must be <= max_out_size
    """

Usage Example:

import atheris
import json
import random

def json_crossover(data1, data2, max_out_size, seed):
    """Crossover function that combines JSON objects."""
    random.seed(seed)
    
    try:
        obj1 = json.loads(data1.decode('utf-8')) if data1 else {}
        obj2 = json.loads(data2.decode('utf-8')) if data2 else {}
    except:
        # If parsing fails, use simple concatenation
        result = data1[:max_out_size//2] + data2[:max_out_size//2]
        return result[:max_out_size]
    
    # Combine JSON objects
    combined = {}
    
    # Randomly take keys from both objects
    all_keys = list(set(obj1.keys()) | set(obj2.keys()))
    for key in all_keys:
        if random.choice([True, False]) and key in obj1:
            combined[key] = obj1[key]
        elif key in obj2:
            combined[key] = obj2[key]
    
    result = json.dumps(combined).encode('utf-8')
    return result[:max_out_size]

# Use with both custom mutator and crossover
atheris.Setup(sys.argv, TestOneInput, 
              custom_mutator=json_mutator,
              custom_crossover=json_crossover)
atheris.Fuzz()

Constants and Special Values

Important constants used throughout the Atheris API.

ALL_REMAINING: int

def path() -> str:
    """
    Get the path to the Atheris installation directory.
    
    Returns:
        str: Path to the directory containing Atheris files
    """

The ALL_REMAINING constant is used with FuzzedDataProvider methods to consume all remaining bytes:

def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    
    # Extract fixed-size header
    header = fdp.ConsumeBytes(10)
    
    # Use all remaining data as payload
    payload = fdp.ConsumeBytes(atheris.ALL_REMAINING)
    
    process_message(header, payload)

Coverage Visualization

Atheris is compatible with Python's coverage.py for analyzing code coverage:

# Run fuzzer with coverage tracking
python3 -m coverage run fuzzer.py -atheris_runs=10000

# Generate HTML coverage report
python3 -m coverage html

# View report
cd htmlcov && python3 -m http.server 8000

Coverage Integration Example:

import atheris
import sys

with atheris.instrument_imports():
    import target_module

def TestOneInput(data):
    target_module.parse(data)

if __name__ == "__main__":
    atheris.Setup(sys.argv, TestOneInput)
    atheris.Fuzz()

Native Extension Fuzzing

For fuzzing native C/C++ extensions, additional build configuration is required:

# Your extension must be built with appropriate compiler flags
# See native_extension_fuzzing.md in the Atheris documentation

def TestNativeExtension(data):
    try:
        import native_module
        native_module.parse_data(data)
    except ImportError:
        # Skip if native module not available
        pass

atheris.Setup(sys.argv, TestNativeExtension, internal_libfuzzer=False)
atheris.Fuzz()

Integration with OSS-Fuzz

Atheris is fully supported by OSS-Fuzz for continuous fuzzing:

#!/usr/bin/python3
# Typical OSS-Fuzz integration structure

import atheris
import sys
import os

# Add project-specific paths
sys.path.insert(0, os.path.dirname(__file__))

with atheris.instrument_imports():
    import target_project

def TestOneInput(data):
    try:
        target_project.fuzz_target(data)
    except target_project.ExpectedException:
        # Don't report expected exceptions as crashes
        pass

def main():
    atheris.Setup(sys.argv, TestOneInput)
    atheris.Fuzz()

if __name__ == "__main__":
    main()

Performance Optimization

Tips for optimizing fuzzer performance:

# Minimize work in TestOneInput for faster execution
def TestOneInput(data):
    # Early exit for obviously invalid input
    if len(data) < 4:
        return
    
    # Use structured input when possible
    fdp = atheris.FuzzedDataProvider(data)
    message_type = fdp.ConsumeInt(1)
    
    # Route to specific handlers
    if message_type == 1:
        handle_type1(fdp)
    elif message_type == 2:
        handle_type2(fdp)
    # ...

# Use timeouts for operations that might hang
atheris.Setup(sys.argv, TestOneInput)
# Run with: python fuzzer.py -timeout=5
atheris.Fuzz()

Install with Tessl CLI

npx tessl i tessl/pypi-atheris

docs

advanced-features.md

core-fuzzing.md

data-provider.md

index.md

instrumentation.md

tile.json