0
# Assembly Engine
1
2
Integrated disassembly and assembly engine supporting multiple architectures for code analysis and binary modification. The assembly engine provides unified interfaces for disassembling machine code and assembling instructions across different CPU architectures.
3
4
## Capabilities
5
6
### Instruction Disassembly
7
8
Disassemble machine code into human-readable assembly instructions with detailed metadata.
9
10
```python { .api }
11
# Access through lief.assembly module
12
import lief.assembly as Assembly
13
14
class Engine:
15
def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]
16
def assemble(self, code: str, address: int = 0) -> bytes
17
18
class Instruction:
19
address: int
20
size: int
21
mnemonic: str
22
raw: bytes
23
operands: List[Operand]
24
25
def to_string(self, with_address: bool = True) -> str
26
def is_call(self) -> bool
27
def is_branch(self) -> bool
28
def is_terminator(self) -> bool
29
30
class Operand:
31
def to_string(self) -> str
32
33
class MemoryAccess(enum.Flag):
34
NONE = 0
35
READ = 1
36
WRITE = 2
37
38
# Disassembly methods available on Binary objects
39
def disassemble(self, address: int, size: int = None) -> Iterator[Optional[Instruction]]
40
def disassemble(self, function_name: str) -> Iterator[Optional[Instruction]]
41
def disassemble_from_bytes(self, buffer: bytes, address: int = 0) -> Iterator[Optional[Instruction]]
42
def assemble(self, address: int, assembly: str) -> bytes
43
```
44
45
Usage example:
46
```python
47
import lief
48
49
binary = lief.parse("/bin/ls")
50
51
# Disassemble at entry point
52
print(f"Disassembling at entry point: 0x{binary.entrypoint:x}")
53
for instruction in binary.disassemble(binary.entrypoint, 64):
54
if instruction:
55
print(f"0x{instruction.address:08x}: {instruction.mnemonic}")
56
print(f" Raw bytes: {instruction.raw.hex()}")
57
print(f" Size: {instruction.size}")
58
59
# Disassemble specific function
60
if binary.has_symbol("main"):
61
print("\nDisassembling main function:")
62
for instruction in binary.disassemble("main"):
63
if instruction:
64
print(instruction.to_string())
65
66
# Disassemble raw bytes
67
machine_code = b"\x48\x89\xe5\x48\x83\xec\x10" # x86-64 function prologue
68
print("\nDisassembling raw bytes:")
69
for instruction in binary.disassemble_from_bytes(machine_code, 0x1000):
70
if instruction:
71
print(f"0x{instruction.address:x}: {instruction.mnemonic}")
72
```
73
74
### Code Assembly
75
76
Assemble assembly instructions into machine code for binary patching and modification.
77
78
```python { .api }
79
def assemble(self, address: int, assembly: str) -> bytes:
80
"""
81
Assemble assembly instructions into machine code.
82
83
Args:
84
address: Target address for assembled code
85
assembly: Assembly instructions as string
86
87
Returns:
88
Machine code bytes
89
"""
90
```
91
92
Usage example:
93
```python
94
binary = lief.parse("/bin/test")
95
96
# Assemble single instruction
97
nop_bytes = binary.assemble(0x1000, "nop")
98
print(f"NOP instruction: {nop_bytes.hex()}")
99
100
# Assemble multiple instructions
101
function_prologue = binary.assemble(0x2000, """
102
push rbp
103
mov rbp, rsp
104
sub rsp, 16
105
""")
106
print(f"Function prologue: {function_prologue.hex()}")
107
108
# Assemble with jumps
109
conditional_code = binary.assemble(0x3000, """
110
cmp eax, 0
111
je end
112
mov ebx, 1
113
end:
114
ret
115
""")
116
print(f"Conditional code: {conditional_code.hex()}")
117
```
118
119
### Architecture Support
120
121
Support for multiple CPU architectures with architecture-specific instruction handling.
122
123
```python { .api }
124
# Architecture-specific modules
125
import lief.assembly.aarch64 as AArch64
126
import lief.assembly.x86 as x86
127
import lief.assembly.arm as ARM
128
import lief.assembly.mips as MIPS
129
import lief.assembly.riscv as RISCV
130
import lief.assembly.powerpc as PowerPC
131
import lief.assembly.ebpf as eBPF
132
133
# AArch64 Architecture
134
class AArch64:
135
class Instruction(Assembly.Instruction):
136
operands: List[Operand]
137
138
class Operand(Assembly.Operand):
139
pass
140
141
class Register(Operand):
142
reg: REGISTERS
143
144
class Immediate(Operand):
145
value: int
146
147
class Memory(Operand):
148
base: Register
149
offset: int
150
151
class PCRelative(Operand):
152
value: int
153
154
enum REGISTERS:
155
X0 = 0
156
X1 = 1
157
# ... more registers
158
SP = 31
159
XZR = 32
160
161
# x86/x86-64 Architecture
162
class x86:
163
class Instruction(Assembly.Instruction):
164
operands: List[Operand]
165
166
class Operand(Assembly.Operand):
167
pass
168
169
class Register(Operand):
170
reg: REGISTERS
171
172
class Immediate(Operand):
173
value: int
174
175
class Memory(Operand):
176
base: Optional[Register]
177
index: Optional[Register]
178
scale: int
179
displacement: int
180
181
enum REGISTERS:
182
EAX = 0
183
ECX = 1
184
EDX = 2
185
EBX = 3
186
ESP = 4
187
EBP = 5
188
ESI = 6
189
EDI = 7
190
# x86-64 extended registers
191
R8 = 8
192
R9 = 9
193
# ... more registers
194
195
class Engine:
196
"""Base disassembly engine class."""
197
def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]
198
def assemble(self, code: str, address: int = 0) -> bytes
199
```
200
201
#### x86/x86-64 Support
202
203
Intel x86 and AMD64 architecture support with full instruction set coverage.
204
205
```python { .api }
206
# x86-specific features available through lief.assembly.x86
207
# Supports:
208
# - 16-bit, 32-bit, and 64-bit modes
209
# - SSE/AVX vector instructions
210
# - System instructions
211
# - FPU instructions
212
# - Modern extensions (BMI, etc.)
213
```
214
215
Usage example:
216
```python
217
import lief
218
import lief.assembly as Assembly
219
220
binary = lief.parse("/bin/ls") # x86-64 binary
221
222
# Disassemble with enhanced instruction analysis
223
for instruction in binary.disassemble(binary.entrypoint, 64):
224
if instruction:
225
print(f"{instruction.to_string()}")
226
227
# Enhanced instruction type checking
228
if instruction.is_call():
229
print(" -> CALL instruction")
230
elif instruction.is_branch():
231
print(" -> BRANCH instruction")
232
elif instruction.is_terminator():
233
print(" -> TERMINATOR instruction")
234
235
# Print operands with details
236
for i, operand in enumerate(instruction.operands):
237
print(f" Operand {i}: {operand.to_string()}")
238
239
# Check memory access patterns
240
if hasattr(instruction, 'memory_access'):
241
if instruction.memory_access & Assembly.MemoryAccess.READ:
242
print(" -> Reads memory")
243
if instruction.memory_access & Assembly.MemoryAccess.WRITE:
244
print(" -> Writes memory")
245
246
# Use standalone assembly engine
247
engine = Assembly.Engine()
248
code_bytes = b'\x48\x89\xe5' # mov rbp, rsp (x86-64)
249
instructions = list(engine.disassemble(code_bytes, 0x1000))
250
for instr in instructions:
251
print(f"0x{instr.address:08x}: {instr.mnemonic}")
252
253
# Assemble with standalone engine
254
machine_code = engine.assemble("push ebp\nmov ebp, esp", 0x1000)
255
print(f"Assembled: {machine_code.hex()}")
256
```
257
258
#### ARM/AArch64 Support
259
260
ARM 32-bit and 64-bit architecture support including Thumb mode.
261
262
```python { .api }
263
# ARM-specific features available through lief.assembly.arm and lief.assembly.aarch64
264
# Supports:
265
# - ARM32 (ARM mode and Thumb mode)
266
# - AArch64 (64-bit ARM)
267
# - NEON vector instructions
268
# - Cryptographic extensions
269
# - System registers
270
```
271
272
Usage example:
273
```python
274
# ARM64 binary analysis
275
arm_binary = lief.parse("/system/bin/app_process64") # Android ARM64
276
277
for instruction in arm_binary.disassemble(arm_binary.entrypoint, 64):
278
if instruction:
279
print(f"0x{instruction.address:x}: {instruction.mnemonic}")
280
281
# ARM64-specific instruction analysis
282
if instruction.mnemonic.startswith("str") or instruction.mnemonic.startswith("ldr"):
283
print(" -> Memory access instruction")
284
elif instruction.mnemonic.startswith("b"):
285
print(" -> Branch instruction")
286
```
287
288
#### RISC-V Support
289
290
RISC-V architecture support for the emerging open-source instruction set.
291
292
```python { .api }
293
# RISC-V features available through lief.assembly.riscv
294
# Supports:
295
# - RV32I/RV64I base instruction sets
296
# - Standard extensions (M, A, F, D, C)
297
# - Privileged instructions
298
# - Custom extensions
299
```
300
301
#### MIPS Support
302
303
MIPS architecture support for embedded and networking systems.
304
305
```python { .api }
306
# MIPS features available through lief.assembly.mips
307
# Supports:
308
# - MIPS32/MIPS64
309
# - Big-endian and little-endian
310
# - Delay slots
311
# - Coprocessor instructions
312
```
313
314
#### PowerPC Support
315
316
PowerPC architecture support for legacy and embedded systems.
317
318
```python { .api }
319
# PowerPC features available through lief.assembly.powerpc
320
# Supports:
321
# - PowerPC 32-bit and 64-bit
322
# - Vector instructions (AltiVec)
323
# - System instructions
324
```
325
326
#### eBPF Support
327
328
Extended Berkeley Packet Filter support for kernel and networking analysis.
329
330
```python { .api }
331
# eBPF features available through lief.assembly.ebpf
332
# Supports:
333
# - eBPF instruction set
334
# - Kernel helper functions
335
# - Map operations
336
# - System call analysis
337
```
338
339
### Advanced Disassembly Features
340
341
Enhanced disassembly capabilities for detailed code analysis.
342
343
```python { .api }
344
class Instruction:
345
def is_call(self) -> bool:
346
"""Check if instruction is a function call."""
347
348
def is_jump(self) -> bool:
349
"""Check if instruction is a jump/branch."""
350
351
def is_conditional(self) -> bool:
352
"""Check if instruction is conditional."""
353
354
def is_terminator(self) -> bool:
355
"""Check if instruction terminates basic block."""
356
357
def memory_access(self) -> MemoryAccess:
358
"""Get memory access type (read/write/none)."""
359
360
def operands(self) -> List[Operand]:
361
"""Get instruction operands."""
362
```
363
364
Usage example:
365
```python
366
binary = lief.parse("/usr/bin/gcc")
367
368
# Advanced instruction analysis
369
for instruction in binary.disassemble("main"):
370
if instruction:
371
print(f"{instruction.to_string()}")
372
373
# Analyze instruction properties
374
if instruction.is_call():
375
print(" -> Function call")
376
elif instruction.is_jump():
377
if instruction.is_conditional():
378
print(" -> Conditional branch")
379
else:
380
print(" -> Unconditional jump")
381
elif instruction.is_terminator():
382
print(" -> Basic block terminator")
383
384
# Check memory access
385
access = instruction.memory_access()
386
if access & MemoryAccess.READ:
387
print(" -> Reads memory")
388
if access & MemoryAccess.WRITE:
389
print(" -> Writes memory")
390
```
391
392
### Control Flow Analysis
393
394
Analyze control flow patterns and basic block structure.
395
396
```python { .api }
397
def analyze_control_flow(binary, start_address, max_instructions=1000):
398
"""
399
Analyze control flow starting from address.
400
401
Returns basic blocks and control flow graph.
402
"""
403
basic_blocks = []
404
current_block = []
405
406
for instruction in binary.disassemble(start_address, max_instructions * 4):
407
if instruction:
408
current_block.append(instruction)
409
410
# Check for block terminator
411
if instruction.is_terminator():
412
basic_blocks.append(current_block)
413
current_block = []
414
415
# Handle calls (typically continue execution)
416
elif instruction.is_call():
417
# Call doesn't end basic block in most cases
418
continue
419
420
return basic_blocks
421
```
422
423
Usage example:
424
```python
425
def analyze_function_flow(binary, function_name):
426
"""Analyze control flow within a function."""
427
428
if not binary.has_symbol(function_name):
429
print(f"Function {function_name} not found")
430
return
431
432
print(f"Analyzing control flow for {function_name}:")
433
434
blocks = analyze_control_flow(binary, binary.get_function_address(function_name))
435
436
for i, block in enumerate(blocks):
437
print(f"\nBasic Block {i}:")
438
for instruction in block:
439
print(f" {instruction.to_string()}")
440
441
# Analyze block ending
442
last_instruction = block[-1]
443
if last_instruction.is_call():
444
print(" -> Ends with function call")
445
elif last_instruction.is_jump():
446
if last_instruction.is_conditional():
447
print(" -> Ends with conditional branch")
448
else:
449
print(" -> Ends with unconditional jump")
450
elif "ret" in last_instruction.mnemonic:
451
print(" -> Function return")
452
453
# Usage
454
binary = lief.parse("/bin/bash")
455
analyze_function_flow(binary, "main")
456
```
457
458
### Binary Modification with Assembly
459
460
Combine disassembly and assembly for binary modification workflows.
461
462
```python { .api }
463
def patch_function_with_assembly(binary, function_name, new_assembly):
464
"""
465
Replace function with new assembly code.
466
467
Args:
468
binary: LIEF binary object
469
function_name: Name of function to patch
470
new_assembly: New assembly code as string
471
472
Returns:
473
Success status and patch information
474
"""
475
```
476
477
Usage example:
478
```python
479
def patch_binary_function(binary_path, function_name, new_code):
480
"""Patch a function in a binary with new assembly code."""
481
482
binary = lief.parse(binary_path)
483
if not binary:
484
return False
485
486
# Find target function
487
if not binary.has_symbol(function_name):
488
print(f"Function {function_name} not found")
489
return False
490
491
func_addr = binary.get_function_address(function_name)
492
print(f"Found {function_name} at 0x{func_addr:x}")
493
494
# Disassemble original function
495
print("Original code:")
496
original_size = 0
497
for instruction in binary.disassemble(function_name):
498
if instruction:
499
print(f" {instruction.to_string()}")
500
original_size += instruction.size
501
502
# Stop at return instruction
503
if "ret" in instruction.mnemonic:
504
break
505
506
# Assemble new code
507
new_machine_code = binary.assemble(func_addr, new_code)
508
print(f"\nNew machine code: {new_machine_code.hex()}")
509
print(f"Original size: {original_size}, New size: {len(new_machine_code)}")
510
511
# Apply patch
512
if len(new_machine_code) <= original_size:
513
binary.patch_address(func_addr, new_machine_code)
514
515
# Pad with NOPs if needed
516
if len(new_machine_code) < original_size:
517
padding = original_size - len(new_machine_code)
518
nop_bytes = binary.assemble(func_addr + len(new_machine_code), "nop" * padding)
519
binary.patch_address(func_addr + len(new_machine_code), nop_bytes)
520
521
print("Patch applied successfully")
522
return True
523
else:
524
print("New code too large for available space")
525
return False
526
527
# Usage
528
new_function_code = """
529
mov eax, 42
530
ret
531
"""
532
533
success = patch_binary_function("/tmp/test_binary", "get_value", new_function_code)
534
if success:
535
print("Binary patching completed")
536
```
537
538
## Types
539
540
```python { .api }
541
class Engine:
542
"""Base disassembly engine."""
543
pass
544
545
class Instruction:
546
address: int
547
size: int
548
mnemonic: str
549
raw: bytes
550
551
def to_string(self, with_address: bool = True) -> str
552
def is_call(self) -> bool
553
def is_jump(self) -> bool
554
def is_conditional(self) -> bool
555
def is_terminator(self) -> bool
556
def memory_access(self) -> MemoryAccess
557
558
enum MemoryAccess(enum.Flag):
559
NONE = 0
560
READ = 1
561
WRITE = 2
562
563
class Operand:
564
"""Instruction operand representation."""
565
type: OperandType
566
value: Union[int, str]
567
size: int
568
569
enum OperandType:
570
REGISTER = 1
571
IMMEDIATE = 2
572
MEMORY = 3
573
DISPLACEMENT = 4
574
575
# Architecture-specific instruction extensions would be available
576
# through the respective architecture modules:
577
# - lief.assembly.x86
578
# - lief.assembly.aarch64
579
# - lief.assembly.arm
580
# - lief.assembly.mips
581
# - lief.assembly.powerpc
582
# - lief.assembly.riscv
583
# - lief.assembly.ebpf
584
```