0
# Advanced Features
1
2
Advanced Atheris capabilities including hook management for specialized instrumentation, custom mutators and crossovers, regex pattern generation, and integration with external tools.
3
4
## Capabilities
5
6
### Hook Management
7
8
Enable specialized instrumentation for regex and string operations to improve fuzzing effectiveness.
9
10
```python { .api }
11
class EnabledHooks:
12
"""Manages the set of enabled instrumentation hooks."""
13
14
def add(self, hook: str) -> None:
15
"""
16
Enable a specific instrumentation hook.
17
18
Args:
19
hook (str): Hook name to enable:
20
- 'RegEx': Instrument regular expression operations
21
- 'str': Instrument string method calls (startswith, endswith)
22
"""
23
24
def __contains__(self, hook: str) -> bool:
25
"""
26
Check if a hook is enabled.
27
28
Args:
29
hook (str): Hook name to check
30
31
Returns:
32
bool: True if the hook is enabled
33
"""
34
35
# Global hook manager instance
36
enabled_hooks: EnabledHooks
37
```
38
39
**Usage Examples:**
40
41
```python
42
import atheris
43
import re
44
45
# Enable regex instrumentation before compiling patterns
46
atheris.enabled_hooks.add("RegEx")
47
48
def TestOneInput(data):
49
text = data.decode('utf-8', errors='ignore')
50
51
# These regex operations will now be instrumented
52
if re.search(r'\d{3}-\d{2}-\d{4}', text):
53
process_ssn(text)
54
55
if re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', text):
56
process_email(text)
57
58
# Enable string method instrumentation
59
atheris.enabled_hooks.add("str")
60
61
def TestStringMethods(data):
62
text = data.decode('utf-8', errors='ignore')
63
64
# These string methods will be instrumented
65
if text.startswith('HTTP/'):
66
parse_http_header(text)
67
68
if text.endswith('.json'):
69
parse_json_file(text)
70
```
71
72
### Regex Pattern Generation
73
74
Generate strings that match regex patterns for improved fuzzing coverage.
75
76
```python { .api }
77
def gen_match(pattern):
78
"""
79
Generate a string that matches a regular expression pattern.
80
81
Useful for creating seed inputs or understanding what patterns
82
a regex is designed to match.
83
84
Args:
85
pattern (str or bytes): Regular expression pattern
86
87
Returns:
88
str or bytes: A string that matches the given pattern
89
90
Note:
91
This is a best-effort generator and may not handle all regex features.
92
Complex patterns with lookarounds or advanced features may not be
93
fully supported.
94
"""
95
```
96
97
**Usage Examples:**
98
99
```python
100
import atheris
101
102
# Generate matching strings for testing
103
email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
104
sample_email = atheris.gen_match(email_pattern)
105
print(f"Generated email: {sample_email}") # e.g., "a@a.aa"
106
107
phone_pattern = r'\(\d{3}\) \d{3}-\d{4}'
108
sample_phone = atheris.gen_match(phone_pattern)
109
print(f"Generated phone: {sample_phone}") # e.g., "(000) 000-0000"
110
111
# Use in custom mutators
112
def custom_mutator(data, max_size, seed):
113
if seed % 10 == 0:
114
# Occasionally generate valid-looking input
115
return atheris.gen_match(r'user:\w+;pass:\w+').encode('utf-8')
116
else:
117
return atheris.Mutate(data, max_size)
118
```
119
120
### Custom Mutators
121
122
Implement domain-specific mutation strategies for more effective fuzzing.
123
124
**Custom Mutator Function Signature:**
125
126
```python { .api }
127
def custom_mutator(data: bytes, max_size: int, seed: int) -> bytes:
128
"""
129
Custom mutation function for domain-specific input generation.
130
131
Args:
132
data (bytes): Input data to mutate (may be empty for initial generation)
133
max_size (int): Maximum size of the output in bytes
134
seed (int): Random seed for reproducible mutations
135
136
Returns:
137
bytes: Mutated data, length must be <= max_size
138
"""
139
```
140
141
**Usage Examples:**
142
143
```python
144
import atheris
145
import zlib
146
import json
147
import random
148
149
def json_mutator(data, max_size, seed):
150
"""Custom mutator for JSON data."""
151
random.seed(seed)
152
153
try:
154
# Try to parse existing data as JSON
155
if data:
156
obj = json.loads(data.decode('utf-8'))
157
else:
158
obj = {}
159
except:
160
# If parsing fails, create a basic structure
161
obj = {"key": "value"}
162
163
# Apply JSON-specific mutations
164
mutation_type = random.randint(0, 4)
165
166
if mutation_type == 0:
167
# Add random key-value pair
168
obj[f"key_{random.randint(0, 100)}"] = random.choice([
169
random.randint(0, 1000),
170
f"value_{random.randint(0, 100)}",
171
random.random(),
172
random.choice([True, False])
173
])
174
elif mutation_type == 1:
175
# Mutate existing values
176
if obj:
177
key = random.choice(list(obj.keys()))
178
obj[key] = "mutated_" + str(random.randint(0, 1000))
179
elif mutation_type == 2:
180
# Add nested structure
181
obj["nested"] = {"inner": random.randint(0, 100)}
182
else:
183
# Use libFuzzer's mutation on serialized data
184
serialized = json.dumps(obj).encode('utf-8')
185
mutated_serialized = atheris.Mutate(serialized, max_size - 100)
186
try:
187
json.loads(mutated_serialized.decode('utf-8'))
188
return mutated_serialized
189
except:
190
pass # Fall through to normal serialization
191
192
result = json.dumps(obj).encode('utf-8')
193
return result[:max_size]
194
195
def compressed_mutator(data, max_size, seed):
196
"""Custom mutator for compressed data."""
197
try:
198
# Decompress, mutate, recompress
199
decompressed = zlib.decompress(data)
200
mutated = atheris.Mutate(decompressed, len(decompressed) * 2)
201
return zlib.compress(mutated)[:max_size]
202
except:
203
# If decompression fails, create valid compressed data
204
return zlib.compress(b"Hello " + str(seed).encode())[:max_size]
205
206
# Use custom mutators
207
atheris.Setup(sys.argv, TestOneInput, custom_mutator=json_mutator)
208
atheris.Fuzz()
209
```
210
211
### Custom Crossovers
212
213
Implement domain-specific crossover strategies for combining inputs.
214
215
**Custom Crossover Function Signature:**
216
217
```python { .api }
218
def custom_crossover(data1: bytes, data2: bytes, max_out_size: int, seed: int) -> bytes:
219
"""
220
Custom crossover function for domain-specific input combination.
221
222
Args:
223
data1 (bytes): First input to combine
224
data2 (bytes): Second input to combine
225
max_out_size (int): Maximum size of the output in bytes
226
seed (int): Random seed for reproducible crossovers
227
228
Returns:
229
bytes: Combined data, length must be <= max_out_size
230
"""
231
```
232
233
**Usage Example:**
234
235
```python
236
import atheris
237
import json
238
import random
239
240
def json_crossover(data1, data2, max_out_size, seed):
241
"""Crossover function that combines JSON objects."""
242
random.seed(seed)
243
244
try:
245
obj1 = json.loads(data1.decode('utf-8')) if data1 else {}
246
obj2 = json.loads(data2.decode('utf-8')) if data2 else {}
247
except:
248
# If parsing fails, use simple concatenation
249
result = data1[:max_out_size//2] + data2[:max_out_size//2]
250
return result[:max_out_size]
251
252
# Combine JSON objects
253
combined = {}
254
255
# Randomly take keys from both objects
256
all_keys = list(set(obj1.keys()) | set(obj2.keys()))
257
for key in all_keys:
258
if random.choice([True, False]) and key in obj1:
259
combined[key] = obj1[key]
260
elif key in obj2:
261
combined[key] = obj2[key]
262
263
result = json.dumps(combined).encode('utf-8')
264
return result[:max_out_size]
265
266
# Use with both custom mutator and crossover
267
atheris.Setup(sys.argv, TestOneInput,
268
custom_mutator=json_mutator,
269
custom_crossover=json_crossover)
270
atheris.Fuzz()
271
```
272
273
### Constants and Special Values
274
275
Important constants used throughout the Atheris API.
276
277
```python { .api }
278
ALL_REMAINING: int
279
280
def path() -> str:
281
"""
282
Get the path to the Atheris installation directory.
283
284
Returns:
285
str: Path to the directory containing Atheris files
286
"""
287
```
288
289
The `ALL_REMAINING` constant is used with FuzzedDataProvider methods to consume all remaining bytes:
290
291
```python
292
def TestOneInput(data):
293
fdp = atheris.FuzzedDataProvider(data)
294
295
# Extract fixed-size header
296
header = fdp.ConsumeBytes(10)
297
298
# Use all remaining data as payload
299
payload = fdp.ConsumeBytes(atheris.ALL_REMAINING)
300
301
process_message(header, payload)
302
```
303
304
### Coverage Visualization
305
306
Atheris is compatible with Python's `coverage.py` for analyzing code coverage:
307
308
```bash
309
# Run fuzzer with coverage tracking
310
python3 -m coverage run fuzzer.py -atheris_runs=10000
311
312
# Generate HTML coverage report
313
python3 -m coverage html
314
315
# View report
316
cd htmlcov && python3 -m http.server 8000
317
```
318
319
**Coverage Integration Example:**
320
321
```python
322
import atheris
323
import sys
324
325
with atheris.instrument_imports():
326
import target_module
327
328
def TestOneInput(data):
329
target_module.parse(data)
330
331
if __name__ == "__main__":
332
atheris.Setup(sys.argv, TestOneInput)
333
atheris.Fuzz()
334
```
335
336
### Native Extension Fuzzing
337
338
For fuzzing native C/C++ extensions, additional build configuration is required:
339
340
```python
341
# Your extension must be built with appropriate compiler flags
342
# See native_extension_fuzzing.md in the Atheris documentation
343
344
def TestNativeExtension(data):
345
try:
346
import native_module
347
native_module.parse_data(data)
348
except ImportError:
349
# Skip if native module not available
350
pass
351
352
atheris.Setup(sys.argv, TestNativeExtension, internal_libfuzzer=False)
353
atheris.Fuzz()
354
```
355
356
### Integration with OSS-Fuzz
357
358
Atheris is fully supported by OSS-Fuzz for continuous fuzzing:
359
360
```python
361
#!/usr/bin/python3
362
# Typical OSS-Fuzz integration structure
363
364
import atheris
365
import sys
366
import os
367
368
# Add project-specific paths
369
sys.path.insert(0, os.path.dirname(__file__))
370
371
with atheris.instrument_imports():
372
import target_project
373
374
def TestOneInput(data):
375
try:
376
target_project.fuzz_target(data)
377
except target_project.ExpectedException:
378
# Don't report expected exceptions as crashes
379
pass
380
381
def main():
382
atheris.Setup(sys.argv, TestOneInput)
383
atheris.Fuzz()
384
385
if __name__ == "__main__":
386
main()
387
```
388
389
### Performance Optimization
390
391
Tips for optimizing fuzzer performance:
392
393
```python
394
# Minimize work in TestOneInput for faster execution
395
def TestOneInput(data):
396
# Early exit for obviously invalid input
397
if len(data) < 4:
398
return
399
400
# Use structured input when possible
401
fdp = atheris.FuzzedDataProvider(data)
402
message_type = fdp.ConsumeInt(1)
403
404
# Route to specific handlers
405
if message_type == 1:
406
handle_type1(fdp)
407
elif message_type == 2:
408
handle_type2(fdp)
409
# ...
410
411
# Use timeouts for operations that might hang
412
atheris.Setup(sys.argv, TestOneInput)
413
# Run with: python fuzzer.py -timeout=5
414
atheris.Fuzz()
415
```