CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-lief

Library to instrument executable formats including ELF, PE, Mach-O, and Android formats

Pending
Overview
Eval results
Files

debug-info.mddocs/

Debug Information

Advanced debug information parsing for DWARF and PDB formats enabling source-level analysis, debugging support, and program understanding. Debug information bridges the gap between compiled machine code and original source code.

Capabilities

Debug Format Detection

Identify and access debug information embedded in binaries.

class DebugInfo:
    format: FORMAT
    
    def has_debug_info(self) -> bool:
        """Check if binary contains debug information."""

enum FORMAT:
    UNKNOWN = 0
    DWARF = 1
    PDB = 2

# Access debug info through Binary.debug_info property
binary.debug_info: DebugInfo

Usage example:

import lief

binary = lief.parse("/usr/bin/gcc")

# Check for debug information
if binary.debug_info:
    print(f"Debug format: {binary.debug_info.format}")
    
    if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
        print("DWARF debug information detected")
    elif binary.debug_info.format == lief.DebugInfo.FORMAT.PDB:
        print("PDB debug information detected")
else:
    print("No debug information found")

DWARF Debug Information

Comprehensive DWARF (Debugging With Attributed Record Formats) support for Unix-like systems.

# DWARF parsing available through lief.dwarf module
import lief.dwarf as DWARF

class CompilationUnit:
    """DWARF compilation unit containing debug information for a source file."""
    language: DW_LANG
    name: str
    producer: str
    low_address: int
    high_address: int
    
    def find_function(self, name: str) -> Optional[Function]
    def find_variable(self, name: str) -> Optional[Variable]
    def functions(self) -> Iterator[Function]
    def variables(self) -> Iterator[Variable]
    def types(self) -> Iterator[Type]

class Function:
    """DWARF function debug information."""
    name: str
    linkage_name: str
    address: range_t
    file: str
    line: int
    type: Optional[Type]
    parameters: Iterator[Parameter]
    variables: Iterator[Variable]
    
    def scope(self) -> Scope

class Variable:
    """DWARF variable debug information."""
    name: str
    linkage_name: str
    address: Optional[int]
    file: str
    line: int
    type: Optional[Type]
    scope: Scope

class Type:
    """DWARF type information."""
    name: str
    size: int
    kind: TYPE_KIND
    
enum TYPE_KIND:
    UNKNOWN = 0
    UNSPECIFIED = 1
    ADDRESS = 2
    BOOLEAN = 3
    COMPLEX_FLOAT = 4
    FLOAT = 5
    SIGNED = 6
    SIGNED_CHAR = 7
    UNSIGNED = 8
    UNSIGNED_CHAR = 9
    IMAGINARY_FLOAT = 10
    PACKED_DECIMAL = 11
    NUMERIC_STRING = 12
    EDITED = 13
    SIGNED_FIXED = 14
    UNSIGNED_FIXED = 15
    DECIMAL_FLOAT = 16
    UTF = 17

enum DW_LANG:
    C89 = 1
    C = 2
    Ada83 = 3
    C_plus_plus = 4
    Cobol74 = 5
    Cobol85 = 6
    Fortran77 = 7
    Fortran90 = 8
    Pascal83 = 9
    Modula2 = 10
    Java = 11
    C99 = 12
    Ada95 = 13
    Fortran95 = 14
    PLI = 15
    ObjC = 16
    ObjC_plus_plus = 17
    UPC = 18
    D = 19
    Python = 20
    Rust = 21
    C11 = 22
    Swift = 23
    Julia = 24
    Dylan = 25
    C_plus_plus_14 = 26
    Fortran03 = 27
    Fortran08 = 28
    RenderScript = 29

Usage example:

import lief
import lief.dwarf as DWARF

# Parse binary with DWARF debug info
binary = lief.parse("/usr/bin/debug_program")

if binary.debug_info and binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
    print("Analyzing DWARF debug information...")
    
    # Access DWARF-specific functionality
    dwarf_info = DWARF.load(binary)
    
    # Iterate through compilation units
    for cu in dwarf_info.compilation_units():
        print(f"\nCompilation Unit: {cu.name}")
        print(f"Language: {cu.language}")
        print(f"Producer: {cu.producer}")
        print(f"Address range: 0x{cu.low_address:x} - 0x{cu.high_address:x}")
        
        # List functions in this compilation unit
        print("Functions:")
        for function in cu.functions():
            print(f"  {function.name} @ 0x{function.address.low:x}")
            print(f"    File: {function.file}:{function.line}")
            
            # List function parameters
            if function.parameters:
                print("    Parameters:")
                for param in function.parameters:
                    type_name = param.type.name if param.type else "unknown"
                    print(f"      {param.name}: {type_name}")
                    
        # List global variables
        print("Variables:")
        for variable in cu.variables():
            print(f"  {variable.name}")
            if variable.address:
                print(f"    Address: 0x{variable.address:x}")
            print(f"    Location: {variable.file}:{variable.line}")

PDB Debug Information

Microsoft PDB (Program Database) format support for Windows executables.

# PDB parsing available through lief.pdb module  
import lief.pdb as PDB

class PublicSymbol:
    """PDB public symbol information."""
    name: str
    section_id: int
    RVA: int

class CompilationUnit:
    """PDB compilation unit (module)."""
    module_name: str
    object_filename: str
    
    def sources(self) -> Iterator[str]
    def functions(self) -> Iterator[Function]

class Function:
    """PDB function debug information."""
    name: str
    RVA: int
    size: int
    section_id: int
    
    def debug_location(self) -> debug_location_t

class Type:
    """PDB type information."""
    pass

# Main PDB interface
class PDB:
    age: int
    guid: str
    
    def compilation_units(self) -> Iterator[CompilationUnit]
    def public_symbols(self) -> Iterator[PublicSymbol]
    def functions(self) -> Iterator[Function]
    def types(self) -> Iterator[Type]

Usage example:

import lief
import lief.pdb as PDB

# Parse Windows binary with PDB debug info
binary = lief.PE.parse("C:\\Program Files\\App\\app.exe")

if binary.debug_info and binary.debug_info.format == lief.DebugInfo.FORMAT.PDB:
    print("Analyzing PDB debug information...")
    
    # Access PDB-specific functionality
    pdb_info = PDB.load(binary)
    
    print(f"PDB GUID: {pdb_info.guid}")
    print(f"PDB Age: {pdb_info.age}")
    
    # List public symbols
    print("\nPublic symbols:")
    for symbol in pdb_info.public_symbols():
        print(f"  {symbol.name} @ RVA 0x{symbol.RVA:x}")
        
    # List compilation units (modules)
    print("\nCompilation units:")
    for cu in pdb_info.compilation_units():
        print(f"  Module: {cu.module_name}")
        print(f"  Object: {cu.object_filename}")
        
        # List source files
        sources = list(cu.sources())
        if sources:
            print("    Sources:")
            for source in sources:
                print(f"      {source}")
                
        # List functions in module
        functions = list(cu.functions())
        if functions:
            print("    Functions:")
            for func in functions[:5]:  # Show first 5
                print(f"      {func.name} @ RVA 0x{func.RVA:x}")
            if len(functions) > 5:
                print(f"      ... and {len(functions) - 5} more")

Source Code Mapping

Map machine code addresses back to source code locations using debug information.

class debug_location_t:
    line: int
    file: str
    
def addr_to_line(binary, address: int) -> Optional[debug_location_t]:
    """
    Map machine code address to source location.
    
    Args:
        binary: Binary with debug information
        address: Machine code address
        
    Returns:
        Source location or None if not found
    """

Usage example:

def analyze_crash_address(binary_path, crash_address):
    """Analyze crash address using debug information."""
    
    binary = lief.parse(binary_path)
    if not binary.debug_info:
        print("No debug information available")
        return
        
    # Map address to source location
    location = addr_to_line(binary, crash_address)
    if location:
        print(f"Crash at 0x{crash_address:x}:")
        print(f"  File: {location.file}")
        print(f"  Line: {location.line}")
        
        # Find containing function
        if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
            dwarf_info = DWARF.load(binary)
            for cu in dwarf_info.compilation_units():
                for function in cu.functions():
                    if (function.address.low <= crash_address <= function.address.high):
                        print(f"  Function: {function.name}")
                        break
    else:
        print(f"No source location found for address 0x{crash_address:x}")

# Usage
analyze_crash_address("/usr/bin/crashed_program", 0x401234)

Variable and Type Analysis

Analyze program variables and data types using debug information.

def analyze_data_structures(binary):
    """Analyze data structures and types from debug info."""
    
    if not binary.debug_info:
        return
        
    if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
        dwarf_info = DWARF.load(binary)
        
        # Collect all types
        all_types = {}
        for cu in dwarf_info.compilation_units():
            for type_info in cu.types():
                all_types[type_info.name] = type_info
                
        # Analyze structure types
        print("Data structures:")
        for name, type_info in all_types.items():
            if type_info.kind == DWARF.TYPE_KIND.STRUCT:
                print(f"  struct {name} (size: {type_info.size})")
                
        # Analyze global variables
        print("\nGlobal variables:")
        for cu in dwarf_info.compilation_units():
            for var in cu.variables():
                if var.address:  # Global variables have addresses
                    type_name = var.type.name if var.type else "unknown"
                    print(f"  {var.name}: {type_name} @ 0x{var.address:x}")

Call Stack Analysis

Analyze call stacks and function relationships using debug information.

def analyze_call_stack(binary, addresses):
    """
    Analyze call stack using debug information.
    
    Args:
        binary: Binary with debug information
        addresses: List of return addresses from stack trace
    """
    
    print("Call stack analysis:")
    for i, addr in enumerate(addresses):
        location = addr_to_line(binary, addr)
        if location:
            print(f"  #{i}: 0x{addr:x} in {location.file}:{location.line}")
            
            # Find function name
            if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
                dwarf_info = DWARF.load(binary)
                for cu in dwarf_info.compilation_units():
                    for func in cu.functions():
                        if func.address.low <= addr <= func.address.high:
                            print(f"      Function: {func.name}")
                            break
        else:
            print(f"  #{i}: 0x{addr:x} (no debug info)")

Debug Information Extraction

Extract and export debug information for external analysis tools.

def extract_debug_info(binary_path, output_format="json"):
    """
    Extract debug information from binary.
    
    Args:
        binary_path: Path to binary file
        output_format: Output format ("json", "xml", "text")
        
    Returns:
        Debug information in requested format
    """
    
    binary = lief.parse(binary_path)
    if not binary.debug_info:
        return None
        
    debug_data = {
        "format": str(binary.debug_info.format),
        "compilation_units": [],
        "functions": [],
        "variables": [],
        "types": []
    }
    
    if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
        dwarf_info = DWARF.load(binary)
        
        for cu in dwarf_info.compilation_units():
            cu_data = {
                "name": cu.name,
                "language": str(cu.language),
                "producer": cu.producer,
                "address_range": [cu.low_address, cu.high_address]
            }
            debug_data["compilation_units"].append(cu_data)
            
            # Extract functions
            for func in cu.functions():
                func_data = {
                    "name": func.name,
                    "address": [func.address.low, func.address.high],
                    "file": func.file,
                    "line": func.line
                }
                debug_data["functions"].append(func_data)
                
    if output_format == "json":
        import json
        return json.dumps(debug_data, indent=2)
    else:
        return debug_data

# Usage
debug_json = extract_debug_info("/usr/bin/program", "json")
if debug_json:
    with open("debug_info.json", "w") as f:
        f.write(debug_json)

Types

class DebugInfo:
    format: FORMAT
    
enum FORMAT:
    UNKNOWN = 0
    DWARF = 1
    PDB = 2

class debug_location_t:
    line: int
    file: str

# DWARF-specific types
class range_t:
    low: int
    high: int
    size: int

enum Scope:
    GLOBAL = 0
    LOCAL = 1
    PARAMETER = 2

class Parameter:
    name: str
    type: Optional[Type]
    
# PDB-specific types  
class GUID:
    data1: int
    data2: int  
    data3: int
    data4: bytes

# Common debug information interfaces
class SourceFile:
    path: str
    directory: str
    
class LineEntry:
    address: int
    file: SourceFile
    line: int
    column: int
    
class InlineInfo:
    call_file: SourceFile
    call_line: int
    call_column: int
    callee: Function

Install with Tessl CLI

npx tessl i tessl/pypi-lief

docs

android-formats.md

assembly-engine.md

core-operations.md

debug-info.md

elf-format.md

extended-features.md

index.md

macho-format.md

pe-format.md

tile.json