Library to instrument executable formats including ELF, PE, Mach-O, and Android formats
—
Advanced debug information parsing for DWARF and PDB formats enabling source-level analysis, debugging support, and program understanding. Debug information bridges the gap between compiled machine code and original source code.
Identify and access debug information embedded in binaries.
class DebugInfo:
format: FORMAT
def has_debug_info(self) -> bool:
"""Check if binary contains debug information."""
enum FORMAT:
UNKNOWN = 0
DWARF = 1
PDB = 2
# Access debug info through Binary.debug_info property
binary.debug_info: DebugInfoUsage example:
import lief
binary = lief.parse("/usr/bin/gcc")
# Check for debug information
if binary.debug_info:
print(f"Debug format: {binary.debug_info.format}")
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
print("DWARF debug information detected")
elif binary.debug_info.format == lief.DebugInfo.FORMAT.PDB:
print("PDB debug information detected")
else:
print("No debug information found")Comprehensive DWARF (Debugging With Attributed Record Formats) support for Unix-like systems.
# DWARF parsing available through lief.dwarf module
import lief.dwarf as DWARF
class CompilationUnit:
"""DWARF compilation unit containing debug information for a source file."""
language: DW_LANG
name: str
producer: str
low_address: int
high_address: int
def find_function(self, name: str) -> Optional[Function]
def find_variable(self, name: str) -> Optional[Variable]
def functions(self) -> Iterator[Function]
def variables(self) -> Iterator[Variable]
def types(self) -> Iterator[Type]
class Function:
"""DWARF function debug information."""
name: str
linkage_name: str
address: range_t
file: str
line: int
type: Optional[Type]
parameters: Iterator[Parameter]
variables: Iterator[Variable]
def scope(self) -> Scope
class Variable:
"""DWARF variable debug information."""
name: str
linkage_name: str
address: Optional[int]
file: str
line: int
type: Optional[Type]
scope: Scope
class Type:
"""DWARF type information."""
name: str
size: int
kind: TYPE_KIND
enum TYPE_KIND:
UNKNOWN = 0
UNSPECIFIED = 1
ADDRESS = 2
BOOLEAN = 3
COMPLEX_FLOAT = 4
FLOAT = 5
SIGNED = 6
SIGNED_CHAR = 7
UNSIGNED = 8
UNSIGNED_CHAR = 9
IMAGINARY_FLOAT = 10
PACKED_DECIMAL = 11
NUMERIC_STRING = 12
EDITED = 13
SIGNED_FIXED = 14
UNSIGNED_FIXED = 15
DECIMAL_FLOAT = 16
UTF = 17
enum DW_LANG:
C89 = 1
C = 2
Ada83 = 3
C_plus_plus = 4
Cobol74 = 5
Cobol85 = 6
Fortran77 = 7
Fortran90 = 8
Pascal83 = 9
Modula2 = 10
Java = 11
C99 = 12
Ada95 = 13
Fortran95 = 14
PLI = 15
ObjC = 16
ObjC_plus_plus = 17
UPC = 18
D = 19
Python = 20
Rust = 21
C11 = 22
Swift = 23
Julia = 24
Dylan = 25
C_plus_plus_14 = 26
Fortran03 = 27
Fortran08 = 28
RenderScript = 29Usage example:
import lief
import lief.dwarf as DWARF
# Parse binary with DWARF debug info
binary = lief.parse("/usr/bin/debug_program")
if binary.debug_info and binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
print("Analyzing DWARF debug information...")
# Access DWARF-specific functionality
dwarf_info = DWARF.load(binary)
# Iterate through compilation units
for cu in dwarf_info.compilation_units():
print(f"\nCompilation Unit: {cu.name}")
print(f"Language: {cu.language}")
print(f"Producer: {cu.producer}")
print(f"Address range: 0x{cu.low_address:x} - 0x{cu.high_address:x}")
# List functions in this compilation unit
print("Functions:")
for function in cu.functions():
print(f" {function.name} @ 0x{function.address.low:x}")
print(f" File: {function.file}:{function.line}")
# List function parameters
if function.parameters:
print(" Parameters:")
for param in function.parameters:
type_name = param.type.name if param.type else "unknown"
print(f" {param.name}: {type_name}")
# List global variables
print("Variables:")
for variable in cu.variables():
print(f" {variable.name}")
if variable.address:
print(f" Address: 0x{variable.address:x}")
print(f" Location: {variable.file}:{variable.line}")Microsoft PDB (Program Database) format support for Windows executables.
# PDB parsing available through lief.pdb module
import lief.pdb as PDB
class PublicSymbol:
"""PDB public symbol information."""
name: str
section_id: int
RVA: int
class CompilationUnit:
"""PDB compilation unit (module)."""
module_name: str
object_filename: str
def sources(self) -> Iterator[str]
def functions(self) -> Iterator[Function]
class Function:
"""PDB function debug information."""
name: str
RVA: int
size: int
section_id: int
def debug_location(self) -> debug_location_t
class Type:
"""PDB type information."""
pass
# Main PDB interface
class PDB:
age: int
guid: str
def compilation_units(self) -> Iterator[CompilationUnit]
def public_symbols(self) -> Iterator[PublicSymbol]
def functions(self) -> Iterator[Function]
def types(self) -> Iterator[Type]Usage example:
import lief
import lief.pdb as PDB
# Parse Windows binary with PDB debug info
binary = lief.PE.parse("C:\\Program Files\\App\\app.exe")
if binary.debug_info and binary.debug_info.format == lief.DebugInfo.FORMAT.PDB:
print("Analyzing PDB debug information...")
# Access PDB-specific functionality
pdb_info = PDB.load(binary)
print(f"PDB GUID: {pdb_info.guid}")
print(f"PDB Age: {pdb_info.age}")
# List public symbols
print("\nPublic symbols:")
for symbol in pdb_info.public_symbols():
print(f" {symbol.name} @ RVA 0x{symbol.RVA:x}")
# List compilation units (modules)
print("\nCompilation units:")
for cu in pdb_info.compilation_units():
print(f" Module: {cu.module_name}")
print(f" Object: {cu.object_filename}")
# List source files
sources = list(cu.sources())
if sources:
print(" Sources:")
for source in sources:
print(f" {source}")
# List functions in module
functions = list(cu.functions())
if functions:
print(" Functions:")
for func in functions[:5]: # Show first 5
print(f" {func.name} @ RVA 0x{func.RVA:x}")
if len(functions) > 5:
print(f" ... and {len(functions) - 5} more")Map machine code addresses back to source code locations using debug information.
class debug_location_t:
line: int
file: str
def addr_to_line(binary, address: int) -> Optional[debug_location_t]:
"""
Map machine code address to source location.
Args:
binary: Binary with debug information
address: Machine code address
Returns:
Source location or None if not found
"""Usage example:
def analyze_crash_address(binary_path, crash_address):
"""Analyze crash address using debug information."""
binary = lief.parse(binary_path)
if not binary.debug_info:
print("No debug information available")
return
# Map address to source location
location = addr_to_line(binary, crash_address)
if location:
print(f"Crash at 0x{crash_address:x}:")
print(f" File: {location.file}")
print(f" Line: {location.line}")
# Find containing function
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
dwarf_info = DWARF.load(binary)
for cu in dwarf_info.compilation_units():
for function in cu.functions():
if (function.address.low <= crash_address <= function.address.high):
print(f" Function: {function.name}")
break
else:
print(f"No source location found for address 0x{crash_address:x}")
# Usage
analyze_crash_address("/usr/bin/crashed_program", 0x401234)Analyze program variables and data types using debug information.
def analyze_data_structures(binary):
"""Analyze data structures and types from debug info."""
if not binary.debug_info:
return
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
dwarf_info = DWARF.load(binary)
# Collect all types
all_types = {}
for cu in dwarf_info.compilation_units():
for type_info in cu.types():
all_types[type_info.name] = type_info
# Analyze structure types
print("Data structures:")
for name, type_info in all_types.items():
if type_info.kind == DWARF.TYPE_KIND.STRUCT:
print(f" struct {name} (size: {type_info.size})")
# Analyze global variables
print("\nGlobal variables:")
for cu in dwarf_info.compilation_units():
for var in cu.variables():
if var.address: # Global variables have addresses
type_name = var.type.name if var.type else "unknown"
print(f" {var.name}: {type_name} @ 0x{var.address:x}")Analyze call stacks and function relationships using debug information.
def analyze_call_stack(binary, addresses):
"""
Analyze call stack using debug information.
Args:
binary: Binary with debug information
addresses: List of return addresses from stack trace
"""
print("Call stack analysis:")
for i, addr in enumerate(addresses):
location = addr_to_line(binary, addr)
if location:
print(f" #{i}: 0x{addr:x} in {location.file}:{location.line}")
# Find function name
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
dwarf_info = DWARF.load(binary)
for cu in dwarf_info.compilation_units():
for func in cu.functions():
if func.address.low <= addr <= func.address.high:
print(f" Function: {func.name}")
break
else:
print(f" #{i}: 0x{addr:x} (no debug info)")Extract and export debug information for external analysis tools.
def extract_debug_info(binary_path, output_format="json"):
"""
Extract debug information from binary.
Args:
binary_path: Path to binary file
output_format: Output format ("json", "xml", "text")
Returns:
Debug information in requested format
"""
binary = lief.parse(binary_path)
if not binary.debug_info:
return None
debug_data = {
"format": str(binary.debug_info.format),
"compilation_units": [],
"functions": [],
"variables": [],
"types": []
}
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
dwarf_info = DWARF.load(binary)
for cu in dwarf_info.compilation_units():
cu_data = {
"name": cu.name,
"language": str(cu.language),
"producer": cu.producer,
"address_range": [cu.low_address, cu.high_address]
}
debug_data["compilation_units"].append(cu_data)
# Extract functions
for func in cu.functions():
func_data = {
"name": func.name,
"address": [func.address.low, func.address.high],
"file": func.file,
"line": func.line
}
debug_data["functions"].append(func_data)
if output_format == "json":
import json
return json.dumps(debug_data, indent=2)
else:
return debug_data
# Usage
debug_json = extract_debug_info("/usr/bin/program", "json")
if debug_json:
with open("debug_info.json", "w") as f:
f.write(debug_json)class DebugInfo:
format: FORMAT
enum FORMAT:
UNKNOWN = 0
DWARF = 1
PDB = 2
class debug_location_t:
line: int
file: str
# DWARF-specific types
class range_t:
low: int
high: int
size: int
enum Scope:
GLOBAL = 0
LOCAL = 1
PARAMETER = 2
class Parameter:
name: str
type: Optional[Type]
# PDB-specific types
class GUID:
data1: int
data2: int
data3: int
data4: bytes
# Common debug information interfaces
class SourceFile:
path: str
directory: str
class LineEntry:
address: int
file: SourceFile
line: int
column: int
class InlineInfo:
call_file: SourceFile
call_line: int
call_column: int
callee: FunctionInstall with Tessl CLI
npx tessl i tessl/pypi-lief