0
# Data Provider
1
2
The FuzzedDataProvider converts raw fuzzer bytes into structured data types, enabling more effective testing of functions that expect specific input formats rather than arbitrary byte sequences.
3
4
## Capabilities
5
6
### Basic Construction
7
8
Create a FuzzedDataProvider instance from raw fuzzer input.
9
10
```python { .api }
11
class FuzzedDataProvider:
12
"""Converts raw fuzzer bytes into various data types."""
13
14
def __init__(self, input_bytes: bytes):
15
"""
16
Initialize the data provider with fuzzer input.
17
18
Args:
19
input_bytes (bytes): Raw bytes from the fuzzer
20
"""
21
```
22
23
**Usage Example:**
24
25
```python
26
def TestOneInput(data):
27
fdp = atheris.FuzzedDataProvider(data)
28
# Now use fdp to extract structured data
29
```
30
31
### Byte Consumption
32
33
Extract raw bytes and strings from the fuzzer input.
34
35
```python { .api }
36
def ConsumeBytes(self, count: int) -> bytes:
37
"""
38
Consume exactly count bytes.
39
40
Args:
41
count (int): Number of bytes to consume
42
43
Returns:
44
bytes: Exactly count bytes, or fewer if insufficient data remains
45
"""
46
47
def ConsumeUnicode(self, count: int) -> str:
48
"""
49
Consume unicode characters that may contain surrogate pairs.
50
51
Args:
52
count (int): Number of characters to consume
53
54
Returns:
55
str: Unicode string that may contain surrogate pair characters
56
"""
57
58
def ConsumeUnicodeNoSurrogates(self, count: int) -> str:
59
"""
60
Consume unicode characters without surrogate pairs.
61
62
Args:
63
count (int): Number of characters to consume
64
65
Returns:
66
str: Unicode string without surrogate pair characters
67
"""
68
69
def ConsumeString(self, count: int) -> str:
70
"""
71
Consume a string (alias for ConsumeUnicode in Python 3).
72
73
Args:
74
count (int): Number of characters to consume
75
76
Returns:
77
str: String of requested length
78
"""
79
```
80
81
**Usage Examples:**
82
83
```python
84
def TestOneInput(data):
85
fdp = atheris.FuzzedDataProvider(data)
86
87
# Get raw bytes for binary protocols
88
header = fdp.ConsumeBytes(8)
89
90
# Get text data without encoding issues
91
text = fdp.ConsumeUnicodeNoSurrogates(50)
92
93
# Process both
94
process_binary_message(header, text.encode('utf-8'))
95
```
96
97
### Integer Consumption
98
99
Extract integers of various sizes and ranges.
100
101
```python { .api }
102
def ConsumeInt(self, byte_size: int) -> int:
103
"""
104
Consume a signed integer of specified byte size.
105
106
Args:
107
byte_size (int): Size in bytes (1, 2, 4, or 8)
108
109
Returns:
110
int: Signed integer using two's complement representation
111
"""
112
113
def ConsumeUInt(self, byte_size: int) -> int:
114
"""
115
Consume an unsigned integer of specified byte size.
116
117
Args:
118
byte_size (int): Size in bytes (1, 2, 4, or 8)
119
120
Returns:
121
int: Unsigned integer (always non-negative)
122
"""
123
124
def ConsumeIntInRange(self, min_val: int, max_val: int) -> int:
125
"""
126
Consume an integer within a specific range.
127
128
Args:
129
min_val (int): Minimum value (inclusive)
130
max_val (int): Maximum value (inclusive)
131
132
Returns:
133
int: Integer in the range [min_val, max_val]
134
"""
135
136
def ConsumeIntList(self, count: int, byte_size: int) -> list:
137
"""
138
Consume a list of signed integers.
139
140
Args:
141
count (int): Number of integers to consume
142
byte_size (int): Size of each integer in bytes
143
144
Returns:
145
list: List of signed integers
146
"""
147
148
def ConsumeIntListInRange(self, count: int, min_val: int, max_val: int) -> list:
149
"""
150
Consume a list of integers within a range.
151
152
Args:
153
count (int): Number of integers to consume
154
min_val (int): Minimum value for each integer
155
max_val (int): Maximum value for each integer
156
157
Returns:
158
list: List of integers in the specified range
159
"""
160
```
161
162
**Usage Examples:**
163
164
```python
165
def TestOneInput(data):
166
fdp = atheris.FuzzedDataProvider(data)
167
168
# Get array dimensions
169
width = fdp.ConsumeIntInRange(1, 1000)
170
height = fdp.ConsumeIntInRange(1, 1000)
171
172
# Get array data
173
values = fdp.ConsumeIntListInRange(width * height, -100, 100)
174
175
# Test with structured data
176
test_2d_array(values, width, height)
177
```
178
179
### Float Consumption
180
181
Extract floating-point numbers with various constraints.
182
183
```python { .api }
184
def ConsumeFloat(self) -> float:
185
"""
186
Consume an arbitrary floating-point value.
187
188
May produce special values like NaN, Inf, -Inf, and very large/small numbers.
189
190
Returns:
191
float: Arbitrary floating-point value
192
"""
193
194
def ConsumeRegularFloat(self) -> float:
195
"""
196
Consume a numeric floating-point value.
197
198
Never produces NaN or Inf - only normal numeric values.
199
200
Returns:
201
float: Normal floating-point number
202
"""
203
204
def ConsumeProbability(self) -> float:
205
"""
206
Consume a probability value between 0 and 1.
207
208
Returns:
209
float: Value in the range [0.0, 1.0]
210
"""
211
212
def ConsumeFloatInRange(self, min_val: float, max_val: float) -> float:
213
"""
214
Consume a floating-point value within a range.
215
216
Args:
217
min_val (float): Minimum value (inclusive)
218
max_val (float): Maximum value (inclusive)
219
220
Returns:
221
float: Value in the range [min_val, max_val]
222
"""
223
224
def ConsumeFloatList(self, count: int) -> list:
225
"""
226
Consume a list of arbitrary floats.
227
228
Args:
229
count (int): Number of floats to consume
230
231
Returns:
232
list: List of floats (may include NaN/Inf)
233
"""
234
235
def ConsumeRegularFloatList(self, count: int) -> list:
236
"""
237
Consume a list of normal floats.
238
239
Args:
240
count (int): Number of floats to consume
241
242
Returns:
243
list: List of normal floating-point numbers
244
"""
245
246
def ConsumeProbabilityList(self, count: int) -> list:
247
"""
248
Consume a list of probability values.
249
250
Args:
251
count (int): Number of probabilities to consume
252
253
Returns:
254
list: List of floats in [0.0, 1.0]
255
"""
256
257
def ConsumeFloatListInRange(self, count: int, min_val: float, max_val: float) -> list:
258
"""
259
Consume a list of floats within a range.
260
261
Args:
262
count (int): Number of floats to consume
263
min_val (float): Minimum value for each float
264
max_val (float): Maximum value for each float
265
266
Returns:
267
list: List of floats in the specified range
268
"""
269
```
270
271
### Boolean and Choice Operations
272
273
Extract boolean values and make choices from collections.
274
275
```python { .api }
276
def ConsumeBool(self) -> bool:
277
"""
278
Consume a boolean value.
279
280
Returns:
281
bool: Either True or False
282
"""
283
284
def PickValueInList(self, values: list):
285
"""
286
Pick a random value from a list.
287
288
Args:
289
values (list): List of values to choose from
290
291
Returns:
292
Any: Random element from the list, or None if list is empty
293
"""
294
295
def remaining_bytes(self) -> int:
296
"""
297
Get the number of remaining bytes available for consumption.
298
299
Returns:
300
int: Number of bytes remaining in the input buffer
301
"""
302
303
def buffer(self) -> bytes:
304
"""
305
Get the entire remaining buffer.
306
307
Returns:
308
bytes: All remaining bytes in the input buffer
309
"""
310
```
311
312
**Usage Examples:**
313
314
```python
315
def TestOneInput(data):
316
fdp = atheris.FuzzedDataProvider(data)
317
318
# Make configuration choices
319
enable_feature = fdp.ConsumeBool()
320
mode = fdp.PickValueInList(['fast', 'balanced', 'accurate'])
321
322
# Get numeric parameters
323
threshold = fdp.ConsumeProbability()
324
weights = fdp.ConsumeRegularFloatList(10)
325
326
# Check remaining data
327
if fdp.remaining_bytes() > 100:
328
# Process large payload
329
payload = fdp.ConsumeBytes(100)
330
extra_data = fdp.buffer() # Get all remaining bytes
331
else:
332
# Use all remaining data
333
payload = fdp.buffer()
334
extra_data = b""
335
336
# Test with configuration
337
algorithm = Algorithm(enable_feature, mode, threshold, weights)
338
algorithm.process(payload, extra_data)
339
```
340
341
### Consuming All Remaining Data
342
343
Use the special constant to consume all remaining bytes.
344
345
```python { .api }
346
# Import the constant
347
from atheris import ALL_REMAINING
348
349
# Usage in methods that accept a count parameter
350
def ConsumeBytes(self, count: int) -> bytes:
351
"""When count is ALL_REMAINING, consumes all remaining bytes."""
352
353
def ConsumeUnicode(self, count: int) -> str:
354
"""When count is ALL_REMAINING, consumes all remaining characters."""
355
```
356
357
**Usage Example:**
358
359
```python
360
def TestOneInput(data):
361
fdp = atheris.FuzzedDataProvider(data)
362
363
# Extract header fields
364
version = fdp.ConsumeInt(1)
365
flags = fdp.ConsumeInt(2)
366
367
# Use remaining data as payload
368
payload = fdp.ConsumeBytes(atheris.ALL_REMAINING)
369
370
process_message(version, flags, payload)
371
```
372
373
### Complete Example
374
375
```python
376
import atheris
377
import sys
378
379
def TestJSONLikeData(data):
380
fdp = atheris.FuzzedDataProvider(data)
381
382
# Build a structured object from fuzzer data
383
obj = {}
384
385
# Add some string fields
386
if fdp.ConsumeBool():
387
obj['name'] = fdp.ConsumeUnicodeNoSurrogates(20)
388
389
if fdp.ConsumeBool():
390
obj['description'] = fdp.ConsumeUnicodeNoSurrogates(100)
391
392
# Add numeric fields
393
obj['version'] = fdp.ConsumeIntInRange(1, 10)
394
obj['score'] = fdp.ConsumeFloatInRange(0.0, 100.0)
395
396
# Add array data
397
array_size = fdp.ConsumeIntInRange(0, 10)
398
obj['items'] = fdp.ConsumeIntListInRange(array_size, 1, 1000)
399
400
# Test the object
401
process_data_structure(obj)
402
403
atheris.Setup(sys.argv, TestJSONLikeData)
404
atheris.Fuzz()
405
```