0
# Debug Information
1
2
Advanced debug information parsing for DWARF and PDB formats enabling source-level analysis, debugging support, and program understanding. Debug information bridges the gap between compiled machine code and original source code.
3
4
## Capabilities
5
6
### Debug Format Detection
7
8
Identify and access debug information embedded in binaries.
9
10
```python { .api }
11
class DebugInfo:
12
format: FORMAT
13
14
def has_debug_info(self) -> bool:
15
"""Check if binary contains debug information."""
16
17
enum FORMAT:
18
UNKNOWN = 0
19
DWARF = 1
20
PDB = 2
21
22
# Access debug info through Binary.debug_info property
23
binary.debug_info: DebugInfo
24
```
25
26
Usage example:
27
```python
28
import lief
29
30
binary = lief.parse("/usr/bin/gcc")
31
32
# Check for debug information
33
if binary.debug_info:
34
print(f"Debug format: {binary.debug_info.format}")
35
36
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
37
print("DWARF debug information detected")
38
elif binary.debug_info.format == lief.DebugInfo.FORMAT.PDB:
39
print("PDB debug information detected")
40
else:
41
print("No debug information found")
42
```
43
44
### DWARF Debug Information
45
46
Comprehensive DWARF (Debugging With Attributed Record Formats) support for Unix-like systems.
47
48
```python { .api }
49
# DWARF parsing available through lief.dwarf module
50
import lief.dwarf as DWARF
51
52
class CompilationUnit:
53
"""DWARF compilation unit containing debug information for a source file."""
54
language: DW_LANG
55
name: str
56
producer: str
57
low_address: int
58
high_address: int
59
60
def find_function(self, name: str) -> Optional[Function]
61
def find_variable(self, name: str) -> Optional[Variable]
62
def functions(self) -> Iterator[Function]
63
def variables(self) -> Iterator[Variable]
64
def types(self) -> Iterator[Type]
65
66
class Function:
67
"""DWARF function debug information."""
68
name: str
69
linkage_name: str
70
address: range_t
71
file: str
72
line: int
73
type: Optional[Type]
74
parameters: Iterator[Parameter]
75
variables: Iterator[Variable]
76
77
def scope(self) -> Scope
78
79
class Variable:
80
"""DWARF variable debug information."""
81
name: str
82
linkage_name: str
83
address: Optional[int]
84
file: str
85
line: int
86
type: Optional[Type]
87
scope: Scope
88
89
class Type:
90
"""DWARF type information."""
91
name: str
92
size: int
93
kind: TYPE_KIND
94
95
enum TYPE_KIND:
96
UNKNOWN = 0
97
UNSPECIFIED = 1
98
ADDRESS = 2
99
BOOLEAN = 3
100
COMPLEX_FLOAT = 4
101
FLOAT = 5
102
SIGNED = 6
103
SIGNED_CHAR = 7
104
UNSIGNED = 8
105
UNSIGNED_CHAR = 9
106
IMAGINARY_FLOAT = 10
107
PACKED_DECIMAL = 11
108
NUMERIC_STRING = 12
109
EDITED = 13
110
SIGNED_FIXED = 14
111
UNSIGNED_FIXED = 15
112
DECIMAL_FLOAT = 16
113
UTF = 17
114
115
enum DW_LANG:
116
C89 = 1
117
C = 2
118
Ada83 = 3
119
C_plus_plus = 4
120
Cobol74 = 5
121
Cobol85 = 6
122
Fortran77 = 7
123
Fortran90 = 8
124
Pascal83 = 9
125
Modula2 = 10
126
Java = 11
127
C99 = 12
128
Ada95 = 13
129
Fortran95 = 14
130
PLI = 15
131
ObjC = 16
132
ObjC_plus_plus = 17
133
UPC = 18
134
D = 19
135
Python = 20
136
Rust = 21
137
C11 = 22
138
Swift = 23
139
Julia = 24
140
Dylan = 25
141
C_plus_plus_14 = 26
142
Fortran03 = 27
143
Fortran08 = 28
144
RenderScript = 29
145
```
146
147
Usage example:
148
```python
149
import lief
150
import lief.dwarf as DWARF
151
152
# Parse binary with DWARF debug info
153
binary = lief.parse("/usr/bin/debug_program")
154
155
if binary.debug_info and binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
156
print("Analyzing DWARF debug information...")
157
158
# Access DWARF-specific functionality
159
dwarf_info = DWARF.load(binary)
160
161
# Iterate through compilation units
162
for cu in dwarf_info.compilation_units():
163
print(f"\nCompilation Unit: {cu.name}")
164
print(f"Language: {cu.language}")
165
print(f"Producer: {cu.producer}")
166
print(f"Address range: 0x{cu.low_address:x} - 0x{cu.high_address:x}")
167
168
# List functions in this compilation unit
169
print("Functions:")
170
for function in cu.functions():
171
print(f" {function.name} @ 0x{function.address.low:x}")
172
print(f" File: {function.file}:{function.line}")
173
174
# List function parameters
175
if function.parameters:
176
print(" Parameters:")
177
for param in function.parameters:
178
type_name = param.type.name if param.type else "unknown"
179
print(f" {param.name}: {type_name}")
180
181
# List global variables
182
print("Variables:")
183
for variable in cu.variables():
184
print(f" {variable.name}")
185
if variable.address:
186
print(f" Address: 0x{variable.address:x}")
187
print(f" Location: {variable.file}:{variable.line}")
188
```
189
190
### PDB Debug Information
191
192
Microsoft PDB (Program Database) format support for Windows executables.
193
194
```python { .api }
195
# PDB parsing available through lief.pdb module
196
import lief.pdb as PDB
197
198
class PublicSymbol:
199
"""PDB public symbol information."""
200
name: str
201
section_id: int
202
RVA: int
203
204
class CompilationUnit:
205
"""PDB compilation unit (module)."""
206
module_name: str
207
object_filename: str
208
209
def sources(self) -> Iterator[str]
210
def functions(self) -> Iterator[Function]
211
212
class Function:
213
"""PDB function debug information."""
214
name: str
215
RVA: int
216
size: int
217
section_id: int
218
219
def debug_location(self) -> debug_location_t
220
221
class Type:
222
"""PDB type information."""
223
pass
224
225
# Main PDB interface
226
class PDB:
227
age: int
228
guid: str
229
230
def compilation_units(self) -> Iterator[CompilationUnit]
231
def public_symbols(self) -> Iterator[PublicSymbol]
232
def functions(self) -> Iterator[Function]
233
def types(self) -> Iterator[Type]
234
```
235
236
Usage example:
237
```python
238
import lief
239
import lief.pdb as PDB
240
241
# Parse Windows binary with PDB debug info
242
binary = lief.PE.parse("C:\\Program Files\\App\\app.exe")
243
244
if binary.debug_info and binary.debug_info.format == lief.DebugInfo.FORMAT.PDB:
245
print("Analyzing PDB debug information...")
246
247
# Access PDB-specific functionality
248
pdb_info = PDB.load(binary)
249
250
print(f"PDB GUID: {pdb_info.guid}")
251
print(f"PDB Age: {pdb_info.age}")
252
253
# List public symbols
254
print("\nPublic symbols:")
255
for symbol in pdb_info.public_symbols():
256
print(f" {symbol.name} @ RVA 0x{symbol.RVA:x}")
257
258
# List compilation units (modules)
259
print("\nCompilation units:")
260
for cu in pdb_info.compilation_units():
261
print(f" Module: {cu.module_name}")
262
print(f" Object: {cu.object_filename}")
263
264
# List source files
265
sources = list(cu.sources())
266
if sources:
267
print(" Sources:")
268
for source in sources:
269
print(f" {source}")
270
271
# List functions in module
272
functions = list(cu.functions())
273
if functions:
274
print(" Functions:")
275
for func in functions[:5]: # Show first 5
276
print(f" {func.name} @ RVA 0x{func.RVA:x}")
277
if len(functions) > 5:
278
print(f" ... and {len(functions) - 5} more")
279
```
280
281
### Source Code Mapping
282
283
Map machine code addresses back to source code locations using debug information.
284
285
```python { .api }
286
class debug_location_t:
287
line: int
288
file: str
289
290
def addr_to_line(binary, address: int) -> Optional[debug_location_t]:
291
"""
292
Map machine code address to source location.
293
294
Args:
295
binary: Binary with debug information
296
address: Machine code address
297
298
Returns:
299
Source location or None if not found
300
"""
301
```
302
303
Usage example:
304
```python
305
def analyze_crash_address(binary_path, crash_address):
306
"""Analyze crash address using debug information."""
307
308
binary = lief.parse(binary_path)
309
if not binary.debug_info:
310
print("No debug information available")
311
return
312
313
# Map address to source location
314
location = addr_to_line(binary, crash_address)
315
if location:
316
print(f"Crash at 0x{crash_address:x}:")
317
print(f" File: {location.file}")
318
print(f" Line: {location.line}")
319
320
# Find containing function
321
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
322
dwarf_info = DWARF.load(binary)
323
for cu in dwarf_info.compilation_units():
324
for function in cu.functions():
325
if (function.address.low <= crash_address <= function.address.high):
326
print(f" Function: {function.name}")
327
break
328
else:
329
print(f"No source location found for address 0x{crash_address:x}")
330
331
# Usage
332
analyze_crash_address("/usr/bin/crashed_program", 0x401234)
333
```
334
335
### Variable and Type Analysis
336
337
Analyze program variables and data types using debug information.
338
339
```python { .api }
340
def analyze_data_structures(binary):
341
"""Analyze data structures and types from debug info."""
342
343
if not binary.debug_info:
344
return
345
346
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
347
dwarf_info = DWARF.load(binary)
348
349
# Collect all types
350
all_types = {}
351
for cu in dwarf_info.compilation_units():
352
for type_info in cu.types():
353
all_types[type_info.name] = type_info
354
355
# Analyze structure types
356
print("Data structures:")
357
for name, type_info in all_types.items():
358
if type_info.kind == DWARF.TYPE_KIND.STRUCT:
359
print(f" struct {name} (size: {type_info.size})")
360
361
# Analyze global variables
362
print("\nGlobal variables:")
363
for cu in dwarf_info.compilation_units():
364
for var in cu.variables():
365
if var.address: # Global variables have addresses
366
type_name = var.type.name if var.type else "unknown"
367
print(f" {var.name}: {type_name} @ 0x{var.address:x}")
368
```
369
370
### Call Stack Analysis
371
372
Analyze call stacks and function relationships using debug information.
373
374
```python { .api }
375
def analyze_call_stack(binary, addresses):
376
"""
377
Analyze call stack using debug information.
378
379
Args:
380
binary: Binary with debug information
381
addresses: List of return addresses from stack trace
382
"""
383
384
print("Call stack analysis:")
385
for i, addr in enumerate(addresses):
386
location = addr_to_line(binary, addr)
387
if location:
388
print(f" #{i}: 0x{addr:x} in {location.file}:{location.line}")
389
390
# Find function name
391
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
392
dwarf_info = DWARF.load(binary)
393
for cu in dwarf_info.compilation_units():
394
for func in cu.functions():
395
if func.address.low <= addr <= func.address.high:
396
print(f" Function: {func.name}")
397
break
398
else:
399
print(f" #{i}: 0x{addr:x} (no debug info)")
400
```
401
402
### Debug Information Extraction
403
404
Extract and export debug information for external analysis tools.
405
406
```python { .api }
407
def extract_debug_info(binary_path, output_format="json"):
408
"""
409
Extract debug information from binary.
410
411
Args:
412
binary_path: Path to binary file
413
output_format: Output format ("json", "xml", "text")
414
415
Returns:
416
Debug information in requested format
417
"""
418
419
binary = lief.parse(binary_path)
420
if not binary.debug_info:
421
return None
422
423
debug_data = {
424
"format": str(binary.debug_info.format),
425
"compilation_units": [],
426
"functions": [],
427
"variables": [],
428
"types": []
429
}
430
431
if binary.debug_info.format == lief.DebugInfo.FORMAT.DWARF:
432
dwarf_info = DWARF.load(binary)
433
434
for cu in dwarf_info.compilation_units():
435
cu_data = {
436
"name": cu.name,
437
"language": str(cu.language),
438
"producer": cu.producer,
439
"address_range": [cu.low_address, cu.high_address]
440
}
441
debug_data["compilation_units"].append(cu_data)
442
443
# Extract functions
444
for func in cu.functions():
445
func_data = {
446
"name": func.name,
447
"address": [func.address.low, func.address.high],
448
"file": func.file,
449
"line": func.line
450
}
451
debug_data["functions"].append(func_data)
452
453
if output_format == "json":
454
import json
455
return json.dumps(debug_data, indent=2)
456
else:
457
return debug_data
458
459
# Usage
460
debug_json = extract_debug_info("/usr/bin/program", "json")
461
if debug_json:
462
with open("debug_info.json", "w") as f:
463
f.write(debug_json)
464
```
465
466
## Types
467
468
```python { .api }
469
class DebugInfo:
470
format: FORMAT
471
472
enum FORMAT:
473
UNKNOWN = 0
474
DWARF = 1
475
PDB = 2
476
477
class debug_location_t:
478
line: int
479
file: str
480
481
# DWARF-specific types
482
class range_t:
483
low: int
484
high: int
485
size: int
486
487
enum Scope:
488
GLOBAL = 0
489
LOCAL = 1
490
PARAMETER = 2
491
492
class Parameter:
493
name: str
494
type: Optional[Type]
495
496
# PDB-specific types
497
class GUID:
498
data1: int
499
data2: int
500
data3: int
501
data4: bytes
502
503
# Common debug information interfaces
504
class SourceFile:
505
path: str
506
directory: str
507
508
class LineEntry:
509
address: int
510
file: SourceFile
511
line: int
512
column: int
513
514
class InlineInfo:
515
call_file: SourceFile
516
call_line: int
517
call_column: int
518
callee: Function
519
```