CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-lief

Library to instrument executable formats including ELF, PE, Mach-O, and Android formats

Pending
Overview
Eval results
Files

assembly-engine.mddocs/

Assembly Engine

Integrated disassembly and assembly engine supporting multiple architectures for code analysis and binary modification. The assembly engine provides unified interfaces for disassembling machine code and assembling instructions across different CPU architectures.

Capabilities

Instruction Disassembly

Disassemble machine code into human-readable assembly instructions with detailed metadata.

# Access through lief.assembly module
import lief.assembly as Assembly

class Engine:
    def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]
    def assemble(self, code: str, address: int = 0) -> bytes
    
class Instruction:
    address: int
    size: int
    mnemonic: str
    raw: bytes
    operands: List[Operand]
    
    def to_string(self, with_address: bool = True) -> str
    def is_call(self) -> bool
    def is_branch(self) -> bool
    def is_terminator(self) -> bool
    
class Operand:
    def to_string(self) -> str

class MemoryAccess(enum.Flag):
    NONE = 0
    READ = 1
    WRITE = 2

# Disassembly methods available on Binary objects
def disassemble(self, address: int, size: int = None) -> Iterator[Optional[Instruction]]
def disassemble(self, function_name: str) -> Iterator[Optional[Instruction]]
def disassemble_from_bytes(self, buffer: bytes, address: int = 0) -> Iterator[Optional[Instruction]]
def assemble(self, address: int, assembly: str) -> bytes

Usage example:

import lief

binary = lief.parse("/bin/ls")

# Disassemble at entry point
print(f"Disassembling at entry point: 0x{binary.entrypoint:x}")
for instruction in binary.disassemble(binary.entrypoint, 64):
    if instruction:
        print(f"0x{instruction.address:08x}: {instruction.mnemonic}")
        print(f"  Raw bytes: {instruction.raw.hex()}")
        print(f"  Size: {instruction.size}")

# Disassemble specific function
if binary.has_symbol("main"):
    print("\nDisassembling main function:")
    for instruction in binary.disassemble("main"):
        if instruction:
            print(instruction.to_string())
            
# Disassemble raw bytes
machine_code = b"\x48\x89\xe5\x48\x83\xec\x10"  # x86-64 function prologue
print("\nDisassembling raw bytes:")
for instruction in binary.disassemble_from_bytes(machine_code, 0x1000):
    if instruction:
        print(f"0x{instruction.address:x}: {instruction.mnemonic}")

Code Assembly

Assemble assembly instructions into machine code for binary patching and modification.

def assemble(self, address: int, assembly: str) -> bytes:
    """
    Assemble assembly instructions into machine code.
    
    Args:
        address: Target address for assembled code
        assembly: Assembly instructions as string
        
    Returns:
        Machine code bytes
    """

Usage example:

binary = lief.parse("/bin/test")

# Assemble single instruction
nop_bytes = binary.assemble(0x1000, "nop")
print(f"NOP instruction: {nop_bytes.hex()}")

# Assemble multiple instructions
function_prologue = binary.assemble(0x2000, """
    push rbp
    mov rbp, rsp
    sub rsp, 16
""")
print(f"Function prologue: {function_prologue.hex()}")

# Assemble with jumps
conditional_code = binary.assemble(0x3000, """
    cmp eax, 0
    je end
    mov ebx, 1
    end:
    ret
""")
print(f"Conditional code: {conditional_code.hex()}")

Architecture Support

Support for multiple CPU architectures with architecture-specific instruction handling.

# Architecture-specific modules
import lief.assembly.aarch64 as AArch64
import lief.assembly.x86 as x86
import lief.assembly.arm as ARM
import lief.assembly.mips as MIPS
import lief.assembly.riscv as RISCV
import lief.assembly.powerpc as PowerPC
import lief.assembly.ebpf as eBPF

# AArch64 Architecture
class AArch64:
    class Instruction(Assembly.Instruction):
        operands: List[Operand]
        
    class Operand(Assembly.Operand):
        pass
        
    class Register(Operand):
        reg: REGISTERS
        
    class Immediate(Operand):
        value: int
        
    class Memory(Operand):
        base: Register
        offset: int
        
    class PCRelative(Operand):
        value: int
        
    enum REGISTERS:
        X0 = 0
        X1 = 1
        # ... more registers
        SP = 31
        XZR = 32
        
# x86/x86-64 Architecture
class x86:
    class Instruction(Assembly.Instruction):
        operands: List[Operand]
        
    class Operand(Assembly.Operand):
        pass
        
    class Register(Operand):
        reg: REGISTERS
        
    class Immediate(Operand):
        value: int
        
    class Memory(Operand):
        base: Optional[Register]
        index: Optional[Register]
        scale: int
        displacement: int
        
    enum REGISTERS:
        EAX = 0
        ECX = 1
        EDX = 2
        EBX = 3
        ESP = 4
        EBP = 5
        ESI = 6
        EDI = 7
        # x86-64 extended registers
        R8 = 8
        R9 = 9
        # ... more registers

class Engine:
    """Base disassembly engine class."""
    def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]
    def assemble(self, code: str, address: int = 0) -> bytes

x86/x86-64 Support

Intel x86 and AMD64 architecture support with full instruction set coverage.

# x86-specific features available through lief.assembly.x86
# Supports:
# - 16-bit, 32-bit, and 64-bit modes
# - SSE/AVX vector instructions  
# - System instructions
# - FPU instructions
# - Modern extensions (BMI, etc.)

Usage example:

import lief
import lief.assembly as Assembly

binary = lief.parse("/bin/ls")  # x86-64 binary

# Disassemble with enhanced instruction analysis
for instruction in binary.disassemble(binary.entrypoint, 64):
    if instruction:
        print(f"{instruction.to_string()}")
        
        # Enhanced instruction type checking
        if instruction.is_call():
            print("  -> CALL instruction")
        elif instruction.is_branch():
            print("  -> BRANCH instruction")
        elif instruction.is_terminator():
            print("  -> TERMINATOR instruction")
            
        # Print operands with details
        for i, operand in enumerate(instruction.operands):
            print(f"  Operand {i}: {operand.to_string()}")
            
        # Check memory access patterns
        if hasattr(instruction, 'memory_access'):
            if instruction.memory_access & Assembly.MemoryAccess.READ:
                print("  -> Reads memory")
            if instruction.memory_access & Assembly.MemoryAccess.WRITE:
                print("  -> Writes memory")

# Use standalone assembly engine
engine = Assembly.Engine()
code_bytes = b'\x48\x89\xe5'  # mov rbp, rsp (x86-64)
instructions = list(engine.disassemble(code_bytes, 0x1000))
for instr in instructions:
    print(f"0x{instr.address:08x}: {instr.mnemonic}")
    
# Assemble with standalone engine
machine_code = engine.assemble("push ebp\nmov ebp, esp", 0x1000)
print(f"Assembled: {machine_code.hex()}")

ARM/AArch64 Support

ARM 32-bit and 64-bit architecture support including Thumb mode.

# ARM-specific features available through lief.assembly.arm and lief.assembly.aarch64
# Supports:
# - ARM32 (ARM mode and Thumb mode)
# - AArch64 (64-bit ARM)
# - NEON vector instructions
# - Cryptographic extensions
# - System registers

Usage example:

# ARM64 binary analysis
arm_binary = lief.parse("/system/bin/app_process64")  # Android ARM64

for instruction in arm_binary.disassemble(arm_binary.entrypoint, 64):
    if instruction:
        print(f"0x{instruction.address:x}: {instruction.mnemonic}")
        
        # ARM64-specific instruction analysis
        if instruction.mnemonic.startswith("str") or instruction.mnemonic.startswith("ldr"):
            print("  -> Memory access instruction")
        elif instruction.mnemonic.startswith("b"):
            print("  -> Branch instruction")

RISC-V Support

RISC-V architecture support for the emerging open-source instruction set.

# RISC-V features available through lief.assembly.riscv
# Supports:
# - RV32I/RV64I base instruction sets
# - Standard extensions (M, A, F, D, C)
# - Privileged instructions
# - Custom extensions

MIPS Support

MIPS architecture support for embedded and networking systems.

# MIPS features available through lief.assembly.mips  
# Supports:
# - MIPS32/MIPS64
# - Big-endian and little-endian
# - Delay slots
# - Coprocessor instructions

PowerPC Support

PowerPC architecture support for legacy and embedded systems.

# PowerPC features available through lief.assembly.powerpc
# Supports:  
# - PowerPC 32-bit and 64-bit
# - Vector instructions (AltiVec)
# - System instructions

eBPF Support

Extended Berkeley Packet Filter support for kernel and networking analysis.

# eBPF features available through lief.assembly.ebpf
# Supports:
# - eBPF instruction set
# - Kernel helper functions
# - Map operations
# - System call analysis

Advanced Disassembly Features

Enhanced disassembly capabilities for detailed code analysis.

class Instruction:
    def is_call(self) -> bool:
        """Check if instruction is a function call."""
        
    def is_jump(self) -> bool:
        """Check if instruction is a jump/branch."""
        
    def is_conditional(self) -> bool:
        """Check if instruction is conditional."""
        
    def is_terminator(self) -> bool:
        """Check if instruction terminates basic block."""
        
    def memory_access(self) -> MemoryAccess:
        """Get memory access type (read/write/none)."""
        
    def operands(self) -> List[Operand]:
        """Get instruction operands."""

Usage example:

binary = lief.parse("/usr/bin/gcc")

# Advanced instruction analysis
for instruction in binary.disassemble("main"):
    if instruction:
        print(f"{instruction.to_string()}")
        
        # Analyze instruction properties
        if instruction.is_call():
            print("  -> Function call")
        elif instruction.is_jump():
            if instruction.is_conditional():
                print("  -> Conditional branch")
            else:
                print("  -> Unconditional jump")
        elif instruction.is_terminator():
            print("  -> Basic block terminator")
            
        # Check memory access
        access = instruction.memory_access()
        if access & MemoryAccess.READ:
            print("  -> Reads memory")
        if access & MemoryAccess.WRITE:
            print("  -> Writes memory")

Control Flow Analysis

Analyze control flow patterns and basic block structure.

def analyze_control_flow(binary, start_address, max_instructions=1000):
    """
    Analyze control flow starting from address.
    
    Returns basic blocks and control flow graph.
    """
    basic_blocks = []
    current_block = []
    
    for instruction in binary.disassemble(start_address, max_instructions * 4):
        if instruction:
            current_block.append(instruction)
            
            # Check for block terminator
            if instruction.is_terminator():
                basic_blocks.append(current_block)
                current_block = []
                
            # Handle calls (typically continue execution)
            elif instruction.is_call():
                # Call doesn't end basic block in most cases
                continue
                
    return basic_blocks

Usage example:

def analyze_function_flow(binary, function_name):
    """Analyze control flow within a function."""
    
    if not binary.has_symbol(function_name):
        print(f"Function {function_name} not found")
        return
        
    print(f"Analyzing control flow for {function_name}:")
    
    blocks = analyze_control_flow(binary, binary.get_function_address(function_name))
    
    for i, block in enumerate(blocks):
        print(f"\nBasic Block {i}:")
        for instruction in block:
            print(f"  {instruction.to_string()}")
            
        # Analyze block ending
        last_instruction = block[-1]
        if last_instruction.is_call():
            print("  -> Ends with function call")
        elif last_instruction.is_jump():
            if last_instruction.is_conditional():
                print("  -> Ends with conditional branch")
            else:
                print("  -> Ends with unconditional jump")
        elif "ret" in last_instruction.mnemonic:
            print("  -> Function return")

# Usage
binary = lief.parse("/bin/bash")
analyze_function_flow(binary, "main")

Binary Modification with Assembly

Combine disassembly and assembly for binary modification workflows.

def patch_function_with_assembly(binary, function_name, new_assembly):
    """
    Replace function with new assembly code.
    
    Args:
        binary: LIEF binary object
        function_name: Name of function to patch
        new_assembly: New assembly code as string
        
    Returns:
        Success status and patch information
    """

Usage example:

def patch_binary_function(binary_path, function_name, new_code):
    """Patch a function in a binary with new assembly code."""
    
    binary = lief.parse(binary_path)
    if not binary:
        return False
        
    # Find target function
    if not binary.has_symbol(function_name):
        print(f"Function {function_name} not found")
        return False
        
    func_addr = binary.get_function_address(function_name)
    print(f"Found {function_name} at 0x{func_addr:x}")
    
    # Disassemble original function
    print("Original code:")
    original_size = 0
    for instruction in binary.disassemble(function_name):
        if instruction:
            print(f"  {instruction.to_string()}")
            original_size += instruction.size
            
            # Stop at return instruction
            if "ret" in instruction.mnemonic:
                break
    
    # Assemble new code
    new_machine_code = binary.assemble(func_addr, new_code)
    print(f"\nNew machine code: {new_machine_code.hex()}")
    print(f"Original size: {original_size}, New size: {len(new_machine_code)}")
    
    # Apply patch
    if len(new_machine_code) <= original_size:
        binary.patch_address(func_addr, new_machine_code)
        
        # Pad with NOPs if needed
        if len(new_machine_code) < original_size:
            padding = original_size - len(new_machine_code)
            nop_bytes = binary.assemble(func_addr + len(new_machine_code), "nop" * padding)
            binary.patch_address(func_addr + len(new_machine_code), nop_bytes)
            
        print("Patch applied successfully")
        return True
    else:
        print("New code too large for available space")
        return False

# Usage
new_function_code = """
    mov eax, 42
    ret
"""

success = patch_binary_function("/tmp/test_binary", "get_value", new_function_code)
if success:
    print("Binary patching completed")

Types

class Engine:
    """Base disassembly engine."""
    pass

class Instruction:
    address: int
    size: int
    mnemonic: str  
    raw: bytes
    
    def to_string(self, with_address: bool = True) -> str
    def is_call(self) -> bool
    def is_jump(self) -> bool
    def is_conditional(self) -> bool
    def is_terminator(self) -> bool
    def memory_access(self) -> MemoryAccess

enum MemoryAccess(enum.Flag):
    NONE = 0
    READ = 1
    WRITE = 2

class Operand:
    """Instruction operand representation."""
    type: OperandType
    value: Union[int, str]
    size: int

enum OperandType:
    REGISTER = 1
    IMMEDIATE = 2
    MEMORY = 3
    DISPLACEMENT = 4

# Architecture-specific instruction extensions would be available
# through the respective architecture modules:
# - lief.assembly.x86
# - lief.assembly.aarch64  
# - lief.assembly.arm
# - lief.assembly.mips
# - lief.assembly.powerpc
# - lief.assembly.riscv
# - lief.assembly.ebpf

Install with Tessl CLI

npx tessl i tessl/pypi-lief

docs

android-formats.md

assembly-engine.md

core-operations.md

debug-info.md

elf-format.md

extended-features.md

index.md

macho-format.md

pe-format.md

tile.json