or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-features.mdcore-fuzzing.mddata-provider.mdindex.mdinstrumentation.md

advanced-features.mddocs/

0

# Advanced Features

1

2

Advanced Atheris capabilities including hook management for specialized instrumentation, custom mutators and crossovers, regex pattern generation, and integration with external tools.

3

4

## Capabilities

5

6

### Hook Management

7

8

Enable specialized instrumentation for regex and string operations to improve fuzzing effectiveness.

9

10

```python { .api }

11

class EnabledHooks:

12

"""Manages the set of enabled instrumentation hooks."""

13

14

def add(self, hook: str) -> None:

15

"""

16

Enable a specific instrumentation hook.

17

18

Args:

19

hook (str): Hook name to enable:

20

- 'RegEx': Instrument regular expression operations

21

- 'str': Instrument string method calls (startswith, endswith)

22

"""

23

24

def __contains__(self, hook: str) -> bool:

25

"""

26

Check if a hook is enabled.

27

28

Args:

29

hook (str): Hook name to check

30

31

Returns:

32

bool: True if the hook is enabled

33

"""

34

35

# Global hook manager instance

36

enabled_hooks: EnabledHooks

37

```

38

39

**Usage Examples:**

40

41

```python

42

import atheris

43

import re

44

45

# Enable regex instrumentation before compiling patterns

46

atheris.enabled_hooks.add("RegEx")

47

48

def TestOneInput(data):

49

text = data.decode('utf-8', errors='ignore')

50

51

# These regex operations will now be instrumented

52

if re.search(r'\d{3}-\d{2}-\d{4}', text):

53

process_ssn(text)

54

55

if re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', text):

56

process_email(text)

57

58

# Enable string method instrumentation

59

atheris.enabled_hooks.add("str")

60

61

def TestStringMethods(data):

62

text = data.decode('utf-8', errors='ignore')

63

64

# These string methods will be instrumented

65

if text.startswith('HTTP/'):

66

parse_http_header(text)

67

68

if text.endswith('.json'):

69

parse_json_file(text)

70

```

71

72

### Regex Pattern Generation

73

74

Generate strings that match regex patterns for improved fuzzing coverage.

75

76

```python { .api }

77

def gen_match(pattern):

78

"""

79

Generate a string that matches a regular expression pattern.

80

81

Useful for creating seed inputs or understanding what patterns

82

a regex is designed to match.

83

84

Args:

85

pattern (str or bytes): Regular expression pattern

86

87

Returns:

88

str or bytes: A string that matches the given pattern

89

90

Note:

91

This is a best-effort generator and may not handle all regex features.

92

Complex patterns with lookarounds or advanced features may not be

93

fully supported.

94

"""

95

```

96

97

**Usage Examples:**

98

99

```python

100

import atheris

101

102

# Generate matching strings for testing

103

email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'

104

sample_email = atheris.gen_match(email_pattern)

105

print(f"Generated email: {sample_email}") # e.g., "a@a.aa"

106

107

phone_pattern = r'\(\d{3}\) \d{3}-\d{4}'

108

sample_phone = atheris.gen_match(phone_pattern)

109

print(f"Generated phone: {sample_phone}") # e.g., "(000) 000-0000"

110

111

# Use in custom mutators

112

def custom_mutator(data, max_size, seed):

113

if seed % 10 == 0:

114

# Occasionally generate valid-looking input

115

return atheris.gen_match(r'user:\w+;pass:\w+').encode('utf-8')

116

else:

117

return atheris.Mutate(data, max_size)

118

```

119

120

### Custom Mutators

121

122

Implement domain-specific mutation strategies for more effective fuzzing.

123

124

**Custom Mutator Function Signature:**

125

126

```python { .api }

127

def custom_mutator(data: bytes, max_size: int, seed: int) -> bytes:

128

"""

129

Custom mutation function for domain-specific input generation.

130

131

Args:

132

data (bytes): Input data to mutate (may be empty for initial generation)

133

max_size (int): Maximum size of the output in bytes

134

seed (int): Random seed for reproducible mutations

135

136

Returns:

137

bytes: Mutated data, length must be <= max_size

138

"""

139

```

140

141

**Usage Examples:**

142

143

```python

144

import atheris

145

import zlib

146

import json

147

import random

148

149

def json_mutator(data, max_size, seed):

150

"""Custom mutator for JSON data."""

151

random.seed(seed)

152

153

try:

154

# Try to parse existing data as JSON

155

if data:

156

obj = json.loads(data.decode('utf-8'))

157

else:

158

obj = {}

159

except:

160

# If parsing fails, create a basic structure

161

obj = {"key": "value"}

162

163

# Apply JSON-specific mutations

164

mutation_type = random.randint(0, 4)

165

166

if mutation_type == 0:

167

# Add random key-value pair

168

obj[f"key_{random.randint(0, 100)}"] = random.choice([

169

random.randint(0, 1000),

170

f"value_{random.randint(0, 100)}",

171

random.random(),

172

random.choice([True, False])

173

])

174

elif mutation_type == 1:

175

# Mutate existing values

176

if obj:

177

key = random.choice(list(obj.keys()))

178

obj[key] = "mutated_" + str(random.randint(0, 1000))

179

elif mutation_type == 2:

180

# Add nested structure

181

obj["nested"] = {"inner": random.randint(0, 100)}

182

else:

183

# Use libFuzzer's mutation on serialized data

184

serialized = json.dumps(obj).encode('utf-8')

185

mutated_serialized = atheris.Mutate(serialized, max_size - 100)

186

try:

187

json.loads(mutated_serialized.decode('utf-8'))

188

return mutated_serialized

189

except:

190

pass # Fall through to normal serialization

191

192

result = json.dumps(obj).encode('utf-8')

193

return result[:max_size]

194

195

def compressed_mutator(data, max_size, seed):

196

"""Custom mutator for compressed data."""

197

try:

198

# Decompress, mutate, recompress

199

decompressed = zlib.decompress(data)

200

mutated = atheris.Mutate(decompressed, len(decompressed) * 2)

201

return zlib.compress(mutated)[:max_size]

202

except:

203

# If decompression fails, create valid compressed data

204

return zlib.compress(b"Hello " + str(seed).encode())[:max_size]

205

206

# Use custom mutators

207

atheris.Setup(sys.argv, TestOneInput, custom_mutator=json_mutator)

208

atheris.Fuzz()

209

```

210

211

### Custom Crossovers

212

213

Implement domain-specific crossover strategies for combining inputs.

214

215

**Custom Crossover Function Signature:**

216

217

```python { .api }

218

def custom_crossover(data1: bytes, data2: bytes, max_out_size: int, seed: int) -> bytes:

219

"""

220

Custom crossover function for domain-specific input combination.

221

222

Args:

223

data1 (bytes): First input to combine

224

data2 (bytes): Second input to combine

225

max_out_size (int): Maximum size of the output in bytes

226

seed (int): Random seed for reproducible crossovers

227

228

Returns:

229

bytes: Combined data, length must be <= max_out_size

230

"""

231

```

232

233

**Usage Example:**

234

235

```python

236

import atheris

237

import json

238

import random

239

240

def json_crossover(data1, data2, max_out_size, seed):

241

"""Crossover function that combines JSON objects."""

242

random.seed(seed)

243

244

try:

245

obj1 = json.loads(data1.decode('utf-8')) if data1 else {}

246

obj2 = json.loads(data2.decode('utf-8')) if data2 else {}

247

except:

248

# If parsing fails, use simple concatenation

249

result = data1[:max_out_size//2] + data2[:max_out_size//2]

250

return result[:max_out_size]

251

252

# Combine JSON objects

253

combined = {}

254

255

# Randomly take keys from both objects

256

all_keys = list(set(obj1.keys()) | set(obj2.keys()))

257

for key in all_keys:

258

if random.choice([True, False]) and key in obj1:

259

combined[key] = obj1[key]

260

elif key in obj2:

261

combined[key] = obj2[key]

262

263

result = json.dumps(combined).encode('utf-8')

264

return result[:max_out_size]

265

266

# Use with both custom mutator and crossover

267

atheris.Setup(sys.argv, TestOneInput,

268

custom_mutator=json_mutator,

269

custom_crossover=json_crossover)

270

atheris.Fuzz()

271

```

272

273

### Constants and Special Values

274

275

Important constants used throughout the Atheris API.

276

277

```python { .api }

278

ALL_REMAINING: int

279

280

def path() -> str:

281

"""

282

Get the path to the Atheris installation directory.

283

284

Returns:

285

str: Path to the directory containing Atheris files

286

"""

287

```

288

289

The `ALL_REMAINING` constant is used with FuzzedDataProvider methods to consume all remaining bytes:

290

291

```python

292

def TestOneInput(data):

293

fdp = atheris.FuzzedDataProvider(data)

294

295

# Extract fixed-size header

296

header = fdp.ConsumeBytes(10)

297

298

# Use all remaining data as payload

299

payload = fdp.ConsumeBytes(atheris.ALL_REMAINING)

300

301

process_message(header, payload)

302

```

303

304

### Coverage Visualization

305

306

Atheris is compatible with Python's `coverage.py` for analyzing code coverage:

307

308

```bash

309

# Run fuzzer with coverage tracking

310

python3 -m coverage run fuzzer.py -atheris_runs=10000

311

312

# Generate HTML coverage report

313

python3 -m coverage html

314

315

# View report

316

cd htmlcov && python3 -m http.server 8000

317

```

318

319

**Coverage Integration Example:**

320

321

```python

322

import atheris

323

import sys

324

325

with atheris.instrument_imports():

326

import target_module

327

328

def TestOneInput(data):

329

target_module.parse(data)

330

331

if __name__ == "__main__":

332

atheris.Setup(sys.argv, TestOneInput)

333

atheris.Fuzz()

334

```

335

336

### Native Extension Fuzzing

337

338

For fuzzing native C/C++ extensions, additional build configuration is required:

339

340

```python

341

# Your extension must be built with appropriate compiler flags

342

# See native_extension_fuzzing.md in the Atheris documentation

343

344

def TestNativeExtension(data):

345

try:

346

import native_module

347

native_module.parse_data(data)

348

except ImportError:

349

# Skip if native module not available

350

pass

351

352

atheris.Setup(sys.argv, TestNativeExtension, internal_libfuzzer=False)

353

atheris.Fuzz()

354

```

355

356

### Integration with OSS-Fuzz

357

358

Atheris is fully supported by OSS-Fuzz for continuous fuzzing:

359

360

```python

361

#!/usr/bin/python3

362

# Typical OSS-Fuzz integration structure

363

364

import atheris

365

import sys

366

import os

367

368

# Add project-specific paths

369

sys.path.insert(0, os.path.dirname(__file__))

370

371

with atheris.instrument_imports():

372

import target_project

373

374

def TestOneInput(data):

375

try:

376

target_project.fuzz_target(data)

377

except target_project.ExpectedException:

378

# Don't report expected exceptions as crashes

379

pass

380

381

def main():

382

atheris.Setup(sys.argv, TestOneInput)

383

atheris.Fuzz()

384

385

if __name__ == "__main__":

386

main()

387

```

388

389

### Performance Optimization

390

391

Tips for optimizing fuzzer performance:

392

393

```python

394

# Minimize work in TestOneInput for faster execution

395

def TestOneInput(data):

396

# Early exit for obviously invalid input

397

if len(data) < 4:

398

return

399

400

# Use structured input when possible

401

fdp = atheris.FuzzedDataProvider(data)

402

message_type = fdp.ConsumeInt(1)

403

404

# Route to specific handlers

405

if message_type == 1:

406

handle_type1(fdp)

407

elif message_type == 2:

408

handle_type2(fdp)

409

# ...

410

411

# Use timeouts for operations that might hang

412

atheris.Setup(sys.argv, TestOneInput)

413

# Run with: python fuzzer.py -timeout=5

414

atheris.Fuzz()

415

```