CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-atheris

A coverage-guided fuzzer for Python and Python extensions based on libFuzzer

91

1.28x
Overview
Eval results
Files

data-provider.mddocs/

Data Provider

The FuzzedDataProvider converts raw fuzzer bytes into structured data types, enabling more effective testing of functions that expect specific input formats rather than arbitrary byte sequences.

Capabilities

Basic Construction

Create a FuzzedDataProvider instance from raw fuzzer input.

class FuzzedDataProvider:
    """Converts raw fuzzer bytes into various data types."""
    
    def __init__(self, input_bytes: bytes):
        """
        Initialize the data provider with fuzzer input.
        
        Args:
            input_bytes (bytes): Raw bytes from the fuzzer
        """

Usage Example:

def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    # Now use fdp to extract structured data

Byte Consumption

Extract raw bytes and strings from the fuzzer input.

def ConsumeBytes(self, count: int) -> bytes:
    """
    Consume exactly count bytes.
    
    Args:
        count (int): Number of bytes to consume
    
    Returns:
        bytes: Exactly count bytes, or fewer if insufficient data remains
    """

def ConsumeUnicode(self, count: int) -> str:
    """
    Consume unicode characters that may contain surrogate pairs.
    
    Args:
        count (int): Number of characters to consume
    
    Returns:
        str: Unicode string that may contain surrogate pair characters
    """

def ConsumeUnicodeNoSurrogates(self, count: int) -> str:
    """
    Consume unicode characters without surrogate pairs.
    
    Args:
        count (int): Number of characters to consume
    
    Returns:
        str: Unicode string without surrogate pair characters
    """

def ConsumeString(self, count: int) -> str:
    """
    Consume a string (alias for ConsumeUnicode in Python 3).
    
    Args:
        count (int): Number of characters to consume
    
    Returns:
        str: String of requested length
    """

Usage Examples:

def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    
    # Get raw bytes for binary protocols
    header = fdp.ConsumeBytes(8)
    
    # Get text data without encoding issues
    text = fdp.ConsumeUnicodeNoSurrogates(50)
    
    # Process both
    process_binary_message(header, text.encode('utf-8'))

Integer Consumption

Extract integers of various sizes and ranges.

def ConsumeInt(self, byte_size: int) -> int:
    """
    Consume a signed integer of specified byte size.
    
    Args:
        byte_size (int): Size in bytes (1, 2, 4, or 8)
    
    Returns:
        int: Signed integer using two's complement representation
    """

def ConsumeUInt(self, byte_size: int) -> int:
    """
    Consume an unsigned integer of specified byte size.
    
    Args:
        byte_size (int): Size in bytes (1, 2, 4, or 8)
    
    Returns:
        int: Unsigned integer (always non-negative)
    """

def ConsumeIntInRange(self, min_val: int, max_val: int) -> int:
    """
    Consume an integer within a specific range.
    
    Args:
        min_val (int): Minimum value (inclusive)
        max_val (int): Maximum value (inclusive)
    
    Returns:
        int: Integer in the range [min_val, max_val]
    """

def ConsumeIntList(self, count: int, byte_size: int) -> list:
    """
    Consume a list of signed integers.
    
    Args:
        count (int): Number of integers to consume
        byte_size (int): Size of each integer in bytes
    
    Returns:
        list: List of signed integers
    """

def ConsumeIntListInRange(self, count: int, min_val: int, max_val: int) -> list:
    """
    Consume a list of integers within a range.
    
    Args:
        count (int): Number of integers to consume
        min_val (int): Minimum value for each integer
        max_val (int): Maximum value for each integer
    
    Returns:
        list: List of integers in the specified range
    """

Usage Examples:

def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    
    # Get array dimensions
    width = fdp.ConsumeIntInRange(1, 1000)
    height = fdp.ConsumeIntInRange(1, 1000)
    
    # Get array data
    values = fdp.ConsumeIntListInRange(width * height, -100, 100)
    
    # Test with structured data
    test_2d_array(values, width, height)

Float Consumption

Extract floating-point numbers with various constraints.

def ConsumeFloat(self) -> float:
    """
    Consume an arbitrary floating-point value.
    
    May produce special values like NaN, Inf, -Inf, and very large/small numbers.
    
    Returns:
        float: Arbitrary floating-point value
    """

def ConsumeRegularFloat(self) -> float:
    """
    Consume a numeric floating-point value.
    
    Never produces NaN or Inf - only normal numeric values.
    
    Returns:
        float: Normal floating-point number
    """

def ConsumeProbability(self) -> float:
    """
    Consume a probability value between 0 and 1.
    
    Returns:
        float: Value in the range [0.0, 1.0]
    """

def ConsumeFloatInRange(self, min_val: float, max_val: float) -> float:
    """
    Consume a floating-point value within a range.
    
    Args:
        min_val (float): Minimum value (inclusive)
        max_val (float): Maximum value (inclusive)
    
    Returns:
        float: Value in the range [min_val, max_val]
    """

def ConsumeFloatList(self, count: int) -> list:
    """
    Consume a list of arbitrary floats.
    
    Args:
        count (int): Number of floats to consume
    
    Returns:
        list: List of floats (may include NaN/Inf)
    """

def ConsumeRegularFloatList(self, count: int) -> list:
    """
    Consume a list of normal floats.
    
    Args:
        count (int): Number of floats to consume
    
    Returns:
        list: List of normal floating-point numbers
    """

def ConsumeProbabilityList(self, count: int) -> list:
    """
    Consume a list of probability values.
    
    Args:
        count (int): Number of probabilities to consume
    
    Returns:
        list: List of floats in [0.0, 1.0]
    """

def ConsumeFloatListInRange(self, count: int, min_val: float, max_val: float) -> list:
    """
    Consume a list of floats within a range.
    
    Args:
        count (int): Number of floats to consume
        min_val (float): Minimum value for each float
        max_val (float): Maximum value for each float
    
    Returns:
        list: List of floats in the specified range
    """

Boolean and Choice Operations

Extract boolean values and make choices from collections.

def ConsumeBool(self) -> bool:
    """
    Consume a boolean value.
    
    Returns:
        bool: Either True or False
    """

def PickValueInList(self, values: list):
    """
    Pick a random value from a list.
    
    Args:
        values (list): List of values to choose from
    
    Returns:
        Any: Random element from the list, or None if list is empty
    """

def remaining_bytes(self) -> int:
    """
    Get the number of remaining bytes available for consumption.
    
    Returns:
        int: Number of bytes remaining in the input buffer
    """

def buffer(self) -> bytes:
    """
    Get the entire remaining buffer.
    
    Returns:
        bytes: All remaining bytes in the input buffer
    """

Usage Examples:

def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    
    # Make configuration choices
    enable_feature = fdp.ConsumeBool()
    mode = fdp.PickValueInList(['fast', 'balanced', 'accurate'])
    
    # Get numeric parameters
    threshold = fdp.ConsumeProbability()
    weights = fdp.ConsumeRegularFloatList(10)
    
    # Check remaining data
    if fdp.remaining_bytes() > 100:
        # Process large payload
        payload = fdp.ConsumeBytes(100)
        extra_data = fdp.buffer()  # Get all remaining bytes
    else:
        # Use all remaining data
        payload = fdp.buffer()
        extra_data = b""
    
    # Test with configuration
    algorithm = Algorithm(enable_feature, mode, threshold, weights)
    algorithm.process(payload, extra_data)

Consuming All Remaining Data

Use the special constant to consume all remaining bytes.

# Import the constant
from atheris import ALL_REMAINING

# Usage in methods that accept a count parameter
def ConsumeBytes(self, count: int) -> bytes:
    """When count is ALL_REMAINING, consumes all remaining bytes."""

def ConsumeUnicode(self, count: int) -> str:
    """When count is ALL_REMAINING, consumes all remaining characters."""

Usage Example:

def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    
    # Extract header fields
    version = fdp.ConsumeInt(1)
    flags = fdp.ConsumeInt(2)
    
    # Use remaining data as payload
    payload = fdp.ConsumeBytes(atheris.ALL_REMAINING)
    
    process_message(version, flags, payload)

Complete Example

import atheris
import sys

def TestJSONLikeData(data):
    fdp = atheris.FuzzedDataProvider(data)
    
    # Build a structured object from fuzzer data
    obj = {}
    
    # Add some string fields
    if fdp.ConsumeBool():
        obj['name'] = fdp.ConsumeUnicodeNoSurrogates(20)
    
    if fdp.ConsumeBool():
        obj['description'] = fdp.ConsumeUnicodeNoSurrogates(100)
    
    # Add numeric fields
    obj['version'] = fdp.ConsumeIntInRange(1, 10)
    obj['score'] = fdp.ConsumeFloatInRange(0.0, 100.0)
    
    # Add array data
    array_size = fdp.ConsumeIntInRange(0, 10)
    obj['items'] = fdp.ConsumeIntListInRange(array_size, 1, 1000)
    
    # Test the object
    process_data_structure(obj)

atheris.Setup(sys.argv, TestJSONLikeData)
atheris.Fuzz()

Install with Tessl CLI

npx tessl i tessl/pypi-atheris

docs

advanced-features.md

core-fuzzing.md

data-provider.md

index.md

instrumentation.md

tile.json