A coverage-guided fuzzer for Python and Python extensions based on libFuzzer
91
Advanced Atheris capabilities including hook management for specialized instrumentation, custom mutators and crossovers, regex pattern generation, and integration with external tools.
Enable specialized instrumentation for regex and string operations to improve fuzzing effectiveness.
class EnabledHooks:
"""Manages the set of enabled instrumentation hooks."""
def add(self, hook: str) -> None:
"""
Enable a specific instrumentation hook.
Args:
hook (str): Hook name to enable:
- 'RegEx': Instrument regular expression operations
- 'str': Instrument string method calls (startswith, endswith)
"""
def __contains__(self, hook: str) -> bool:
"""
Check if a hook is enabled.
Args:
hook (str): Hook name to check
Returns:
bool: True if the hook is enabled
"""
# Global hook manager instance
enabled_hooks: EnabledHooksUsage Examples:
import atheris
import re
# Enable regex instrumentation before compiling patterns
atheris.enabled_hooks.add("RegEx")
def TestOneInput(data):
text = data.decode('utf-8', errors='ignore')
# These regex operations will now be instrumented
if re.search(r'\d{3}-\d{2}-\d{4}', text):
process_ssn(text)
if re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', text):
process_email(text)
# Enable string method instrumentation
atheris.enabled_hooks.add("str")
def TestStringMethods(data):
text = data.decode('utf-8', errors='ignore')
# These string methods will be instrumented
if text.startswith('HTTP/'):
parse_http_header(text)
if text.endswith('.json'):
parse_json_file(text)Generate strings that match regex patterns for improved fuzzing coverage.
def gen_match(pattern):
"""
Generate a string that matches a regular expression pattern.
Useful for creating seed inputs or understanding what patterns
a regex is designed to match.
Args:
pattern (str or bytes): Regular expression pattern
Returns:
str or bytes: A string that matches the given pattern
Note:
This is a best-effort generator and may not handle all regex features.
Complex patterns with lookarounds or advanced features may not be
fully supported.
"""Usage Examples:
import atheris
# Generate matching strings for testing
email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
sample_email = atheris.gen_match(email_pattern)
print(f"Generated email: {sample_email}") # e.g., "a@a.aa"
phone_pattern = r'\(\d{3}\) \d{3}-\d{4}'
sample_phone = atheris.gen_match(phone_pattern)
print(f"Generated phone: {sample_phone}") # e.g., "(000) 000-0000"
# Use in custom mutators
def custom_mutator(data, max_size, seed):
if seed % 10 == 0:
# Occasionally generate valid-looking input
return atheris.gen_match(r'user:\w+;pass:\w+').encode('utf-8')
else:
return atheris.Mutate(data, max_size)Implement domain-specific mutation strategies for more effective fuzzing.
Custom Mutator Function Signature:
def custom_mutator(data: bytes, max_size: int, seed: int) -> bytes:
"""
Custom mutation function for domain-specific input generation.
Args:
data (bytes): Input data to mutate (may be empty for initial generation)
max_size (int): Maximum size of the output in bytes
seed (int): Random seed for reproducible mutations
Returns:
bytes: Mutated data, length must be <= max_size
"""Usage Examples:
import atheris
import zlib
import json
import random
def json_mutator(data, max_size, seed):
"""Custom mutator for JSON data."""
random.seed(seed)
try:
# Try to parse existing data as JSON
if data:
obj = json.loads(data.decode('utf-8'))
else:
obj = {}
except:
# If parsing fails, create a basic structure
obj = {"key": "value"}
# Apply JSON-specific mutations
mutation_type = random.randint(0, 4)
if mutation_type == 0:
# Add random key-value pair
obj[f"key_{random.randint(0, 100)}"] = random.choice([
random.randint(0, 1000),
f"value_{random.randint(0, 100)}",
random.random(),
random.choice([True, False])
])
elif mutation_type == 1:
# Mutate existing values
if obj:
key = random.choice(list(obj.keys()))
obj[key] = "mutated_" + str(random.randint(0, 1000))
elif mutation_type == 2:
# Add nested structure
obj["nested"] = {"inner": random.randint(0, 100)}
else:
# Use libFuzzer's mutation on serialized data
serialized = json.dumps(obj).encode('utf-8')
mutated_serialized = atheris.Mutate(serialized, max_size - 100)
try:
json.loads(mutated_serialized.decode('utf-8'))
return mutated_serialized
except:
pass # Fall through to normal serialization
result = json.dumps(obj).encode('utf-8')
return result[:max_size]
def compressed_mutator(data, max_size, seed):
"""Custom mutator for compressed data."""
try:
# Decompress, mutate, recompress
decompressed = zlib.decompress(data)
mutated = atheris.Mutate(decompressed, len(decompressed) * 2)
return zlib.compress(mutated)[:max_size]
except:
# If decompression fails, create valid compressed data
return zlib.compress(b"Hello " + str(seed).encode())[:max_size]
# Use custom mutators
atheris.Setup(sys.argv, TestOneInput, custom_mutator=json_mutator)
atheris.Fuzz()Implement domain-specific crossover strategies for combining inputs.
Custom Crossover Function Signature:
def custom_crossover(data1: bytes, data2: bytes, max_out_size: int, seed: int) -> bytes:
"""
Custom crossover function for domain-specific input combination.
Args:
data1 (bytes): First input to combine
data2 (bytes): Second input to combine
max_out_size (int): Maximum size of the output in bytes
seed (int): Random seed for reproducible crossovers
Returns:
bytes: Combined data, length must be <= max_out_size
"""Usage Example:
import atheris
import json
import random
def json_crossover(data1, data2, max_out_size, seed):
"""Crossover function that combines JSON objects."""
random.seed(seed)
try:
obj1 = json.loads(data1.decode('utf-8')) if data1 else {}
obj2 = json.loads(data2.decode('utf-8')) if data2 else {}
except:
# If parsing fails, use simple concatenation
result = data1[:max_out_size//2] + data2[:max_out_size//2]
return result[:max_out_size]
# Combine JSON objects
combined = {}
# Randomly take keys from both objects
all_keys = list(set(obj1.keys()) | set(obj2.keys()))
for key in all_keys:
if random.choice([True, False]) and key in obj1:
combined[key] = obj1[key]
elif key in obj2:
combined[key] = obj2[key]
result = json.dumps(combined).encode('utf-8')
return result[:max_out_size]
# Use with both custom mutator and crossover
atheris.Setup(sys.argv, TestOneInput,
custom_mutator=json_mutator,
custom_crossover=json_crossover)
atheris.Fuzz()Important constants used throughout the Atheris API.
ALL_REMAINING: int
def path() -> str:
"""
Get the path to the Atheris installation directory.
Returns:
str: Path to the directory containing Atheris files
"""The ALL_REMAINING constant is used with FuzzedDataProvider methods to consume all remaining bytes:
def TestOneInput(data):
fdp = atheris.FuzzedDataProvider(data)
# Extract fixed-size header
header = fdp.ConsumeBytes(10)
# Use all remaining data as payload
payload = fdp.ConsumeBytes(atheris.ALL_REMAINING)
process_message(header, payload)Atheris is compatible with Python's coverage.py for analyzing code coverage:
# Run fuzzer with coverage tracking
python3 -m coverage run fuzzer.py -atheris_runs=10000
# Generate HTML coverage report
python3 -m coverage html
# View report
cd htmlcov && python3 -m http.server 8000Coverage Integration Example:
import atheris
import sys
with atheris.instrument_imports():
import target_module
def TestOneInput(data):
target_module.parse(data)
if __name__ == "__main__":
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()For fuzzing native C/C++ extensions, additional build configuration is required:
# Your extension must be built with appropriate compiler flags
# See native_extension_fuzzing.md in the Atheris documentation
def TestNativeExtension(data):
try:
import native_module
native_module.parse_data(data)
except ImportError:
# Skip if native module not available
pass
atheris.Setup(sys.argv, TestNativeExtension, internal_libfuzzer=False)
atheris.Fuzz()Atheris is fully supported by OSS-Fuzz for continuous fuzzing:
#!/usr/bin/python3
# Typical OSS-Fuzz integration structure
import atheris
import sys
import os
# Add project-specific paths
sys.path.insert(0, os.path.dirname(__file__))
with atheris.instrument_imports():
import target_project
def TestOneInput(data):
try:
target_project.fuzz_target(data)
except target_project.ExpectedException:
# Don't report expected exceptions as crashes
pass
def main():
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()
if __name__ == "__main__":
main()Tips for optimizing fuzzer performance:
# Minimize work in TestOneInput for faster execution
def TestOneInput(data):
# Early exit for obviously invalid input
if len(data) < 4:
return
# Use structured input when possible
fdp = atheris.FuzzedDataProvider(data)
message_type = fdp.ConsumeInt(1)
# Route to specific handlers
if message_type == 1:
handle_type1(fdp)
elif message_type == 2:
handle_type2(fdp)
# ...
# Use timeouts for operations that might hang
atheris.Setup(sys.argv, TestOneInput)
# Run with: python fuzzer.py -timeout=5
atheris.Fuzz()Install with Tessl CLI
npx tessl i tessl/pypi-atherisevals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10