or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

android-formats.mdassembly-engine.mdcore-operations.mddebug-info.mdelf-format.mdextended-features.mdindex.mdmacho-format.mdpe-format.md

assembly-engine.mddocs/

0

# Assembly Engine

1

2

Integrated disassembly and assembly engine supporting multiple architectures for code analysis and binary modification. The assembly engine provides unified interfaces for disassembling machine code and assembling instructions across different CPU architectures.

3

4

## Capabilities

5

6

### Instruction Disassembly

7

8

Disassemble machine code into human-readable assembly instructions with detailed metadata.

9

10

```python { .api }

11

# Access through lief.assembly module

12

import lief.assembly as Assembly

13

14

class Engine:

15

def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]

16

def assemble(self, code: str, address: int = 0) -> bytes

17

18

class Instruction:

19

address: int

20

size: int

21

mnemonic: str

22

raw: bytes

23

operands: List[Operand]

24

25

def to_string(self, with_address: bool = True) -> str

26

def is_call(self) -> bool

27

def is_branch(self) -> bool

28

def is_terminator(self) -> bool

29

30

class Operand:

31

def to_string(self) -> str

32

33

class MemoryAccess(enum.Flag):

34

NONE = 0

35

READ = 1

36

WRITE = 2

37

38

# Disassembly methods available on Binary objects

39

def disassemble(self, address: int, size: int = None) -> Iterator[Optional[Instruction]]

40

def disassemble(self, function_name: str) -> Iterator[Optional[Instruction]]

41

def disassemble_from_bytes(self, buffer: bytes, address: int = 0) -> Iterator[Optional[Instruction]]

42

def assemble(self, address: int, assembly: str) -> bytes

43

```

44

45

Usage example:

46

```python

47

import lief

48

49

binary = lief.parse("/bin/ls")

50

51

# Disassemble at entry point

52

print(f"Disassembling at entry point: 0x{binary.entrypoint:x}")

53

for instruction in binary.disassemble(binary.entrypoint, 64):

54

if instruction:

55

print(f"0x{instruction.address:08x}: {instruction.mnemonic}")

56

print(f" Raw bytes: {instruction.raw.hex()}")

57

print(f" Size: {instruction.size}")

58

59

# Disassemble specific function

60

if binary.has_symbol("main"):

61

print("\nDisassembling main function:")

62

for instruction in binary.disassemble("main"):

63

if instruction:

64

print(instruction.to_string())

65

66

# Disassemble raw bytes

67

machine_code = b"\x48\x89\xe5\x48\x83\xec\x10" # x86-64 function prologue

68

print("\nDisassembling raw bytes:")

69

for instruction in binary.disassemble_from_bytes(machine_code, 0x1000):

70

if instruction:

71

print(f"0x{instruction.address:x}: {instruction.mnemonic}")

72

```

73

74

### Code Assembly

75

76

Assemble assembly instructions into machine code for binary patching and modification.

77

78

```python { .api }

79

def assemble(self, address: int, assembly: str) -> bytes:

80

"""

81

Assemble assembly instructions into machine code.

82

83

Args:

84

address: Target address for assembled code

85

assembly: Assembly instructions as string

86

87

Returns:

88

Machine code bytes

89

"""

90

```

91

92

Usage example:

93

```python

94

binary = lief.parse("/bin/test")

95

96

# Assemble single instruction

97

nop_bytes = binary.assemble(0x1000, "nop")

98

print(f"NOP instruction: {nop_bytes.hex()}")

99

100

# Assemble multiple instructions

101

function_prologue = binary.assemble(0x2000, """

102

push rbp

103

mov rbp, rsp

104

sub rsp, 16

105

""")

106

print(f"Function prologue: {function_prologue.hex()}")

107

108

# Assemble with jumps

109

conditional_code = binary.assemble(0x3000, """

110

cmp eax, 0

111

je end

112

mov ebx, 1

113

end:

114

ret

115

""")

116

print(f"Conditional code: {conditional_code.hex()}")

117

```

118

119

### Architecture Support

120

121

Support for multiple CPU architectures with architecture-specific instruction handling.

122

123

```python { .api }

124

# Architecture-specific modules

125

import lief.assembly.aarch64 as AArch64

126

import lief.assembly.x86 as x86

127

import lief.assembly.arm as ARM

128

import lief.assembly.mips as MIPS

129

import lief.assembly.riscv as RISCV

130

import lief.assembly.powerpc as PowerPC

131

import lief.assembly.ebpf as eBPF

132

133

# AArch64 Architecture

134

class AArch64:

135

class Instruction(Assembly.Instruction):

136

operands: List[Operand]

137

138

class Operand(Assembly.Operand):

139

pass

140

141

class Register(Operand):

142

reg: REGISTERS

143

144

class Immediate(Operand):

145

value: int

146

147

class Memory(Operand):

148

base: Register

149

offset: int

150

151

class PCRelative(Operand):

152

value: int

153

154

enum REGISTERS:

155

X0 = 0

156

X1 = 1

157

# ... more registers

158

SP = 31

159

XZR = 32

160

161

# x86/x86-64 Architecture

162

class x86:

163

class Instruction(Assembly.Instruction):

164

operands: List[Operand]

165

166

class Operand(Assembly.Operand):

167

pass

168

169

class Register(Operand):

170

reg: REGISTERS

171

172

class Immediate(Operand):

173

value: int

174

175

class Memory(Operand):

176

base: Optional[Register]

177

index: Optional[Register]

178

scale: int

179

displacement: int

180

181

enum REGISTERS:

182

EAX = 0

183

ECX = 1

184

EDX = 2

185

EBX = 3

186

ESP = 4

187

EBP = 5

188

ESI = 6

189

EDI = 7

190

# x86-64 extended registers

191

R8 = 8

192

R9 = 9

193

# ... more registers

194

195

class Engine:

196

"""Base disassembly engine class."""

197

def disassemble(self, data: bytes, address: int = 0) -> Iterator[Instruction]

198

def assemble(self, code: str, address: int = 0) -> bytes

199

```

200

201

#### x86/x86-64 Support

202

203

Intel x86 and AMD64 architecture support with full instruction set coverage.

204

205

```python { .api }

206

# x86-specific features available through lief.assembly.x86

207

# Supports:

208

# - 16-bit, 32-bit, and 64-bit modes

209

# - SSE/AVX vector instructions

210

# - System instructions

211

# - FPU instructions

212

# - Modern extensions (BMI, etc.)

213

```

214

215

Usage example:

216

```python

217

import lief

218

import lief.assembly as Assembly

219

220

binary = lief.parse("/bin/ls") # x86-64 binary

221

222

# Disassemble with enhanced instruction analysis

223

for instruction in binary.disassemble(binary.entrypoint, 64):

224

if instruction:

225

print(f"{instruction.to_string()}")

226

227

# Enhanced instruction type checking

228

if instruction.is_call():

229

print(" -> CALL instruction")

230

elif instruction.is_branch():

231

print(" -> BRANCH instruction")

232

elif instruction.is_terminator():

233

print(" -> TERMINATOR instruction")

234

235

# Print operands with details

236

for i, operand in enumerate(instruction.operands):

237

print(f" Operand {i}: {operand.to_string()}")

238

239

# Check memory access patterns

240

if hasattr(instruction, 'memory_access'):

241

if instruction.memory_access & Assembly.MemoryAccess.READ:

242

print(" -> Reads memory")

243

if instruction.memory_access & Assembly.MemoryAccess.WRITE:

244

print(" -> Writes memory")

245

246

# Use standalone assembly engine

247

engine = Assembly.Engine()

248

code_bytes = b'\x48\x89\xe5' # mov rbp, rsp (x86-64)

249

instructions = list(engine.disassemble(code_bytes, 0x1000))

250

for instr in instructions:

251

print(f"0x{instr.address:08x}: {instr.mnemonic}")

252

253

# Assemble with standalone engine

254

machine_code = engine.assemble("push ebp\nmov ebp, esp", 0x1000)

255

print(f"Assembled: {machine_code.hex()}")

256

```

257

258

#### ARM/AArch64 Support

259

260

ARM 32-bit and 64-bit architecture support including Thumb mode.

261

262

```python { .api }

263

# ARM-specific features available through lief.assembly.arm and lief.assembly.aarch64

264

# Supports:

265

# - ARM32 (ARM mode and Thumb mode)

266

# - AArch64 (64-bit ARM)

267

# - NEON vector instructions

268

# - Cryptographic extensions

269

# - System registers

270

```

271

272

Usage example:

273

```python

274

# ARM64 binary analysis

275

arm_binary = lief.parse("/system/bin/app_process64") # Android ARM64

276

277

for instruction in arm_binary.disassemble(arm_binary.entrypoint, 64):

278

if instruction:

279

print(f"0x{instruction.address:x}: {instruction.mnemonic}")

280

281

# ARM64-specific instruction analysis

282

if instruction.mnemonic.startswith("str") or instruction.mnemonic.startswith("ldr"):

283

print(" -> Memory access instruction")

284

elif instruction.mnemonic.startswith("b"):

285

print(" -> Branch instruction")

286

```

287

288

#### RISC-V Support

289

290

RISC-V architecture support for the emerging open-source instruction set.

291

292

```python { .api }

293

# RISC-V features available through lief.assembly.riscv

294

# Supports:

295

# - RV32I/RV64I base instruction sets

296

# - Standard extensions (M, A, F, D, C)

297

# - Privileged instructions

298

# - Custom extensions

299

```

300

301

#### MIPS Support

302

303

MIPS architecture support for embedded and networking systems.

304

305

```python { .api }

306

# MIPS features available through lief.assembly.mips

307

# Supports:

308

# - MIPS32/MIPS64

309

# - Big-endian and little-endian

310

# - Delay slots

311

# - Coprocessor instructions

312

```

313

314

#### PowerPC Support

315

316

PowerPC architecture support for legacy and embedded systems.

317

318

```python { .api }

319

# PowerPC features available through lief.assembly.powerpc

320

# Supports:

321

# - PowerPC 32-bit and 64-bit

322

# - Vector instructions (AltiVec)

323

# - System instructions

324

```

325

326

#### eBPF Support

327

328

Extended Berkeley Packet Filter support for kernel and networking analysis.

329

330

```python { .api }

331

# eBPF features available through lief.assembly.ebpf

332

# Supports:

333

# - eBPF instruction set

334

# - Kernel helper functions

335

# - Map operations

336

# - System call analysis

337

```

338

339

### Advanced Disassembly Features

340

341

Enhanced disassembly capabilities for detailed code analysis.

342

343

```python { .api }

344

class Instruction:

345

def is_call(self) -> bool:

346

"""Check if instruction is a function call."""

347

348

def is_jump(self) -> bool:

349

"""Check if instruction is a jump/branch."""

350

351

def is_conditional(self) -> bool:

352

"""Check if instruction is conditional."""

353

354

def is_terminator(self) -> bool:

355

"""Check if instruction terminates basic block."""

356

357

def memory_access(self) -> MemoryAccess:

358

"""Get memory access type (read/write/none)."""

359

360

def operands(self) -> List[Operand]:

361

"""Get instruction operands."""

362

```

363

364

Usage example:

365

```python

366

binary = lief.parse("/usr/bin/gcc")

367

368

# Advanced instruction analysis

369

for instruction in binary.disassemble("main"):

370

if instruction:

371

print(f"{instruction.to_string()}")

372

373

# Analyze instruction properties

374

if instruction.is_call():

375

print(" -> Function call")

376

elif instruction.is_jump():

377

if instruction.is_conditional():

378

print(" -> Conditional branch")

379

else:

380

print(" -> Unconditional jump")

381

elif instruction.is_terminator():

382

print(" -> Basic block terminator")

383

384

# Check memory access

385

access = instruction.memory_access()

386

if access & MemoryAccess.READ:

387

print(" -> Reads memory")

388

if access & MemoryAccess.WRITE:

389

print(" -> Writes memory")

390

```

391

392

### Control Flow Analysis

393

394

Analyze control flow patterns and basic block structure.

395

396

```python { .api }

397

def analyze_control_flow(binary, start_address, max_instructions=1000):

398

"""

399

Analyze control flow starting from address.

400

401

Returns basic blocks and control flow graph.

402

"""

403

basic_blocks = []

404

current_block = []

405

406

for instruction in binary.disassemble(start_address, max_instructions * 4):

407

if instruction:

408

current_block.append(instruction)

409

410

# Check for block terminator

411

if instruction.is_terminator():

412

basic_blocks.append(current_block)

413

current_block = []

414

415

# Handle calls (typically continue execution)

416

elif instruction.is_call():

417

# Call doesn't end basic block in most cases

418

continue

419

420

return basic_blocks

421

```

422

423

Usage example:

424

```python

425

def analyze_function_flow(binary, function_name):

426

"""Analyze control flow within a function."""

427

428

if not binary.has_symbol(function_name):

429

print(f"Function {function_name} not found")

430

return

431

432

print(f"Analyzing control flow for {function_name}:")

433

434

blocks = analyze_control_flow(binary, binary.get_function_address(function_name))

435

436

for i, block in enumerate(blocks):

437

print(f"\nBasic Block {i}:")

438

for instruction in block:

439

print(f" {instruction.to_string()}")

440

441

# Analyze block ending

442

last_instruction = block[-1]

443

if last_instruction.is_call():

444

print(" -> Ends with function call")

445

elif last_instruction.is_jump():

446

if last_instruction.is_conditional():

447

print(" -> Ends with conditional branch")

448

else:

449

print(" -> Ends with unconditional jump")

450

elif "ret" in last_instruction.mnemonic:

451

print(" -> Function return")

452

453

# Usage

454

binary = lief.parse("/bin/bash")

455

analyze_function_flow(binary, "main")

456

```

457

458

### Binary Modification with Assembly

459

460

Combine disassembly and assembly for binary modification workflows.

461

462

```python { .api }

463

def patch_function_with_assembly(binary, function_name, new_assembly):

464

"""

465

Replace function with new assembly code.

466

467

Args:

468

binary: LIEF binary object

469

function_name: Name of function to patch

470

new_assembly: New assembly code as string

471

472

Returns:

473

Success status and patch information

474

"""

475

```

476

477

Usage example:

478

```python

479

def patch_binary_function(binary_path, function_name, new_code):

480

"""Patch a function in a binary with new assembly code."""

481

482

binary = lief.parse(binary_path)

483

if not binary:

484

return False

485

486

# Find target function

487

if not binary.has_symbol(function_name):

488

print(f"Function {function_name} not found")

489

return False

490

491

func_addr = binary.get_function_address(function_name)

492

print(f"Found {function_name} at 0x{func_addr:x}")

493

494

# Disassemble original function

495

print("Original code:")

496

original_size = 0

497

for instruction in binary.disassemble(function_name):

498

if instruction:

499

print(f" {instruction.to_string()}")

500

original_size += instruction.size

501

502

# Stop at return instruction

503

if "ret" in instruction.mnemonic:

504

break

505

506

# Assemble new code

507

new_machine_code = binary.assemble(func_addr, new_code)

508

print(f"\nNew machine code: {new_machine_code.hex()}")

509

print(f"Original size: {original_size}, New size: {len(new_machine_code)}")

510

511

# Apply patch

512

if len(new_machine_code) <= original_size:

513

binary.patch_address(func_addr, new_machine_code)

514

515

# Pad with NOPs if needed

516

if len(new_machine_code) < original_size:

517

padding = original_size - len(new_machine_code)

518

nop_bytes = binary.assemble(func_addr + len(new_machine_code), "nop" * padding)

519

binary.patch_address(func_addr + len(new_machine_code), nop_bytes)

520

521

print("Patch applied successfully")

522

return True

523

else:

524

print("New code too large for available space")

525

return False

526

527

# Usage

528

new_function_code = """

529

mov eax, 42

530

ret

531

"""

532

533

success = patch_binary_function("/tmp/test_binary", "get_value", new_function_code)

534

if success:

535

print("Binary patching completed")

536

```

537

538

## Types

539

540

```python { .api }

541

class Engine:

542

"""Base disassembly engine."""

543

pass

544

545

class Instruction:

546

address: int

547

size: int

548

mnemonic: str

549

raw: bytes

550

551

def to_string(self, with_address: bool = True) -> str

552

def is_call(self) -> bool

553

def is_jump(self) -> bool

554

def is_conditional(self) -> bool

555

def is_terminator(self) -> bool

556

def memory_access(self) -> MemoryAccess

557

558

enum MemoryAccess(enum.Flag):

559

NONE = 0

560

READ = 1

561

WRITE = 2

562

563

class Operand:

564

"""Instruction operand representation."""

565

type: OperandType

566

value: Union[int, str]

567

size: int

568

569

enum OperandType:

570

REGISTER = 1

571

IMMEDIATE = 2

572

MEMORY = 3

573

DISPLACEMENT = 4

574

575

# Architecture-specific instruction extensions would be available

576

# through the respective architecture modules:

577

# - lief.assembly.x86

578

# - lief.assembly.aarch64

579

# - lief.assembly.arm

580

# - lief.assembly.mips

581

# - lief.assembly.powerpc

582

# - lief.assembly.riscv

583

# - lief.assembly.ebpf

584

```