Library to instrument executable formats including ELF, PE, Mach-O, and Android formats
—
Integrated disassembly and assembly engine supporting multiple architectures for code analysis and binary modification. The assembly engine provides unified interfaces for disassembling machine code and assembling instructions across different CPU architectures.
Disassemble machine code into human-readable assembly instructions with detailed metadata.
# Access through lief.assembly module
import lief.assembly as Assembly
class Engine:
def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]
def assemble(self, code: str, address: int = 0) -> bytes
class Instruction:
address: int
size: int
mnemonic: str
raw: bytes
operands: List[Operand]
def to_string(self, with_address: bool = True) -> str
def is_call(self) -> bool
def is_branch(self) -> bool
def is_terminator(self) -> bool
class Operand:
def to_string(self) -> str
class MemoryAccess(enum.Flag):
NONE = 0
READ = 1
WRITE = 2
# Disassembly methods available on Binary objects
def disassemble(self, address: int, size: int = None) -> Iterator[Optional[Instruction]]
def disassemble(self, function_name: str) -> Iterator[Optional[Instruction]]
def disassemble_from_bytes(self, buffer: bytes, address: int = 0) -> Iterator[Optional[Instruction]]
def assemble(self, address: int, assembly: str) -> bytesUsage example:
import lief
binary = lief.parse("/bin/ls")
# Disassemble at entry point
print(f"Disassembling at entry point: 0x{binary.entrypoint:x}")
for instruction in binary.disassemble(binary.entrypoint, 64):
if instruction:
print(f"0x{instruction.address:08x}: {instruction.mnemonic}")
print(f" Raw bytes: {instruction.raw.hex()}")
print(f" Size: {instruction.size}")
# Disassemble specific function
if binary.has_symbol("main"):
print("\nDisassembling main function:")
for instruction in binary.disassemble("main"):
if instruction:
print(instruction.to_string())
# Disassemble raw bytes
machine_code = b"\x48\x89\xe5\x48\x83\xec\x10" # x86-64 function prologue
print("\nDisassembling raw bytes:")
for instruction in binary.disassemble_from_bytes(machine_code, 0x1000):
if instruction:
print(f"0x{instruction.address:x}: {instruction.mnemonic}")Assemble assembly instructions into machine code for binary patching and modification.
def assemble(self, address: int, assembly: str) -> bytes:
"""
Assemble assembly instructions into machine code.
Args:
address: Target address for assembled code
assembly: Assembly instructions as string
Returns:
Machine code bytes
"""Usage example:
binary = lief.parse("/bin/test")
# Assemble single instruction
nop_bytes = binary.assemble(0x1000, "nop")
print(f"NOP instruction: {nop_bytes.hex()}")
# Assemble multiple instructions
function_prologue = binary.assemble(0x2000, """
push rbp
mov rbp, rsp
sub rsp, 16
""")
print(f"Function prologue: {function_prologue.hex()}")
# Assemble with jumps
conditional_code = binary.assemble(0x3000, """
cmp eax, 0
je end
mov ebx, 1
end:
ret
""")
print(f"Conditional code: {conditional_code.hex()}")Support for multiple CPU architectures with architecture-specific instruction handling.
# Architecture-specific modules
import lief.assembly.aarch64 as AArch64
import lief.assembly.x86 as x86
import lief.assembly.arm as ARM
import lief.assembly.mips as MIPS
import lief.assembly.riscv as RISCV
import lief.assembly.powerpc as PowerPC
import lief.assembly.ebpf as eBPF
# AArch64 Architecture
class AArch64:
class Instruction(Assembly.Instruction):
operands: List[Operand]
class Operand(Assembly.Operand):
pass
class Register(Operand):
reg: REGISTERS
class Immediate(Operand):
value: int
class Memory(Operand):
base: Register
offset: int
class PCRelative(Operand):
value: int
enum REGISTERS:
X0 = 0
X1 = 1
# ... more registers
SP = 31
XZR = 32
# x86/x86-64 Architecture
class x86:
class Instruction(Assembly.Instruction):
operands: List[Operand]
class Operand(Assembly.Operand):
pass
class Register(Operand):
reg: REGISTERS
class Immediate(Operand):
value: int
class Memory(Operand):
base: Optional[Register]
index: Optional[Register]
scale: int
displacement: int
enum REGISTERS:
EAX = 0
ECX = 1
EDX = 2
EBX = 3
ESP = 4
EBP = 5
ESI = 6
EDI = 7
# x86-64 extended registers
R8 = 8
R9 = 9
# ... more registers
class Engine:
"""Base disassembly engine class."""
def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]
def assemble(self, code: str, address: int = 0) -> bytesIntel x86 and AMD64 architecture support with full instruction set coverage.
# x86-specific features available through lief.assembly.x86
# Supports:
# - 16-bit, 32-bit, and 64-bit modes
# - SSE/AVX vector instructions
# - System instructions
# - FPU instructions
# - Modern extensions (BMI, etc.)Usage example:
import lief
import lief.assembly as Assembly
binary = lief.parse("/bin/ls") # x86-64 binary
# Disassemble with enhanced instruction analysis
for instruction in binary.disassemble(binary.entrypoint, 64):
if instruction:
print(f"{instruction.to_string()}")
# Enhanced instruction type checking
if instruction.is_call():
print(" -> CALL instruction")
elif instruction.is_branch():
print(" -> BRANCH instruction")
elif instruction.is_terminator():
print(" -> TERMINATOR instruction")
# Print operands with details
for i, operand in enumerate(instruction.operands):
print(f" Operand {i}: {operand.to_string()}")
# Check memory access patterns
if hasattr(instruction, 'memory_access'):
if instruction.memory_access & Assembly.MemoryAccess.READ:
print(" -> Reads memory")
if instruction.memory_access & Assembly.MemoryAccess.WRITE:
print(" -> Writes memory")
# Use standalone assembly engine
engine = Assembly.Engine()
code_bytes = b'\x48\x89\xe5' # mov rbp, rsp (x86-64)
instructions = list(engine.disassemble(code_bytes, 0x1000))
for instr in instructions:
print(f"0x{instr.address:08x}: {instr.mnemonic}")
# Assemble with standalone engine
machine_code = engine.assemble("push ebp\nmov ebp, esp", 0x1000)
print(f"Assembled: {machine_code.hex()}")ARM 32-bit and 64-bit architecture support including Thumb mode.
# ARM-specific features available through lief.assembly.arm and lief.assembly.aarch64
# Supports:
# - ARM32 (ARM mode and Thumb mode)
# - AArch64 (64-bit ARM)
# - NEON vector instructions
# - Cryptographic extensions
# - System registersUsage example:
# ARM64 binary analysis
arm_binary = lief.parse("/system/bin/app_process64") # Android ARM64
for instruction in arm_binary.disassemble(arm_binary.entrypoint, 64):
if instruction:
print(f"0x{instruction.address:x}: {instruction.mnemonic}")
# ARM64-specific instruction analysis
if instruction.mnemonic.startswith("str") or instruction.mnemonic.startswith("ldr"):
print(" -> Memory access instruction")
elif instruction.mnemonic.startswith("b"):
print(" -> Branch instruction")RISC-V architecture support for the emerging open-source instruction set.
# RISC-V features available through lief.assembly.riscv
# Supports:
# - RV32I/RV64I base instruction sets
# - Standard extensions (M, A, F, D, C)
# - Privileged instructions
# - Custom extensionsMIPS architecture support for embedded and networking systems.
# MIPS features available through lief.assembly.mips
# Supports:
# - MIPS32/MIPS64
# - Big-endian and little-endian
# - Delay slots
# - Coprocessor instructionsPowerPC architecture support for legacy and embedded systems.
# PowerPC features available through lief.assembly.powerpc
# Supports:
# - PowerPC 32-bit and 64-bit
# - Vector instructions (AltiVec)
# - System instructionsExtended Berkeley Packet Filter support for kernel and networking analysis.
# eBPF features available through lief.assembly.ebpf
# Supports:
# - eBPF instruction set
# - Kernel helper functions
# - Map operations
# - System call analysisEnhanced disassembly capabilities for detailed code analysis.
class Instruction:
def is_call(self) -> bool:
"""Check if instruction is a function call."""
def is_jump(self) -> bool:
"""Check if instruction is a jump/branch."""
def is_conditional(self) -> bool:
"""Check if instruction is conditional."""
def is_terminator(self) -> bool:
"""Check if instruction terminates basic block."""
def memory_access(self) -> MemoryAccess:
"""Get memory access type (read/write/none)."""
def operands(self) -> List[Operand]:
"""Get instruction operands."""Usage example:
binary = lief.parse("/usr/bin/gcc")
# Advanced instruction analysis
for instruction in binary.disassemble("main"):
if instruction:
print(f"{instruction.to_string()}")
# Analyze instruction properties
if instruction.is_call():
print(" -> Function call")
elif instruction.is_jump():
if instruction.is_conditional():
print(" -> Conditional branch")
else:
print(" -> Unconditional jump")
elif instruction.is_terminator():
print(" -> Basic block terminator")
# Check memory access
access = instruction.memory_access()
if access & MemoryAccess.READ:
print(" -> Reads memory")
if access & MemoryAccess.WRITE:
print(" -> Writes memory")Analyze control flow patterns and basic block structure.
def analyze_control_flow(binary, start_address, max_instructions=1000):
"""
Analyze control flow starting from address.
Returns basic blocks and control flow graph.
"""
basic_blocks = []
current_block = []
for instruction in binary.disassemble(start_address, max_instructions * 4):
if instruction:
current_block.append(instruction)
# Check for block terminator
if instruction.is_terminator():
basic_blocks.append(current_block)
current_block = []
# Handle calls (typically continue execution)
elif instruction.is_call():
# Call doesn't end basic block in most cases
continue
return basic_blocksUsage example:
def analyze_function_flow(binary, function_name):
"""Analyze control flow within a function."""
if not binary.has_symbol(function_name):
print(f"Function {function_name} not found")
return
print(f"Analyzing control flow for {function_name}:")
blocks = analyze_control_flow(binary, binary.get_function_address(function_name))
for i, block in enumerate(blocks):
print(f"\nBasic Block {i}:")
for instruction in block:
print(f" {instruction.to_string()}")
# Analyze block ending
last_instruction = block[-1]
if last_instruction.is_call():
print(" -> Ends with function call")
elif last_instruction.is_jump():
if last_instruction.is_conditional():
print(" -> Ends with conditional branch")
else:
print(" -> Ends with unconditional jump")
elif "ret" in last_instruction.mnemonic:
print(" -> Function return")
# Usage
binary = lief.parse("/bin/bash")
analyze_function_flow(binary, "main")Combine disassembly and assembly for binary modification workflows.
def patch_function_with_assembly(binary, function_name, new_assembly):
"""
Replace function with new assembly code.
Args:
binary: LIEF binary object
function_name: Name of function to patch
new_assembly: New assembly code as string
Returns:
Success status and patch information
"""Usage example:
def patch_binary_function(binary_path, function_name, new_code):
"""Patch a function in a binary with new assembly code."""
binary = lief.parse(binary_path)
if not binary:
return False
# Find target function
if not binary.has_symbol(function_name):
print(f"Function {function_name} not found")
return False
func_addr = binary.get_function_address(function_name)
print(f"Found {function_name} at 0x{func_addr:x}")
# Disassemble original function
print("Original code:")
original_size = 0
for instruction in binary.disassemble(function_name):
if instruction:
print(f" {instruction.to_string()}")
original_size += instruction.size
# Stop at return instruction
if "ret" in instruction.mnemonic:
break
# Assemble new code
new_machine_code = binary.assemble(func_addr, new_code)
print(f"\nNew machine code: {new_machine_code.hex()}")
print(f"Original size: {original_size}, New size: {len(new_machine_code)}")
# Apply patch
if len(new_machine_code) <= original_size:
binary.patch_address(func_addr, new_machine_code)
# Pad with NOPs if needed
if len(new_machine_code) < original_size:
padding = original_size - len(new_machine_code)
nop_bytes = binary.assemble(func_addr + len(new_machine_code), "nop" * padding)
binary.patch_address(func_addr + len(new_machine_code), nop_bytes)
print("Patch applied successfully")
return True
else:
print("New code too large for available space")
return False
# Usage
new_function_code = """
mov eax, 42
ret
"""
success = patch_binary_function("/tmp/test_binary", "get_value", new_function_code)
if success:
print("Binary patching completed")class Engine:
"""Base disassembly engine."""
pass
class Instruction:
address: int
size: int
mnemonic: str
raw: bytes
def to_string(self, with_address: bool = True) -> str
def is_call(self) -> bool
def is_jump(self) -> bool
def is_conditional(self) -> bool
def is_terminator(self) -> bool
def memory_access(self) -> MemoryAccess
enum MemoryAccess(enum.Flag):
NONE = 0
READ = 1
WRITE = 2
class Operand:
"""Instruction operand representation."""
type: OperandType
value: Union[int, str]
size: int
enum OperandType:
REGISTER = 1
IMMEDIATE = 2
MEMORY = 3
DISPLACEMENT = 4
# Architecture-specific instruction extensions would be available
# through the respective architecture modules:
# - lief.assembly.x86
# - lief.assembly.aarch64
# - lief.assembly.arm
# - lief.assembly.mips
# - lief.assembly.powerpc
# - lief.assembly.riscv
# - lief.assembly.ebpfInstall with Tessl CLI
npx tessl i tessl/pypi-lief