0
# Import and Export Analysis
1
2
Functionality for analyzing import and export tables, including generation of import/export hashes for malware analysis. These features enable detailed examination of PE file dependencies and exported functionality.
3
4
## Capabilities
5
6
### Import Analysis
7
8
Examine imported functions and DLLs used by the PE file.
9
10
```python { .api }
11
def parse_import_directory(self, rva, size, dllnames_only=False):
12
"""
13
Parse import directory at specified RVA.
14
15
Args:
16
rva (int): RVA of import directory
17
size (int): Size of import directory
18
dllnames_only (bool): If True, only parse DLL names for performance
19
20
Populates:
21
self.DIRECTORY_ENTRY_IMPORT: List of ImportDescData objects
22
"""
23
24
def get_import_table(self, rva, max_length=None, contains_addresses=False):
25
"""
26
Get import table data.
27
28
Args:
29
rva (int): RVA of import table
30
max_length (int, optional): Maximum length to parse
31
contains_addresses (bool): Whether table contains addresses instead of RVAs
32
33
Returns:
34
list: Import table entries
35
"""
36
```
37
38
### Export Analysis
39
40
Examine functions and data exported by the PE file.
41
42
```python { .api }
43
def parse_export_directory(self, rva, size, forwarded_only=False):
44
"""
45
Parse export directory at specified RVA.
46
47
Args:
48
rva (int): RVA of export directory
49
size (int): Size of export directory
50
forwarded_only (bool): If True, only parse forwarded exports
51
52
Populates:
53
self.DIRECTORY_ENTRY_EXPORT: ExportDirData object
54
"""
55
```
56
57
### Hash Generation
58
59
Generate hashes for import and export tables used in malware analysis.
60
61
```python { .api }
62
def get_imphash(self):
63
"""
64
Generate import hash (imphash) for the PE file.
65
66
The imphash is calculated from the imported DLLs and function names,
67
providing a way to identify similar malware families that use the
68
same import patterns.
69
70
Returns:
71
str: MD5 hash of normalized import table, or None if no imports
72
"""
73
74
def get_exphash(self):
75
"""
76
Generate export hash (exphash) for the PE file.
77
78
The exphash is calculated from exported function names and ordinals,
79
useful for identifying PE files with similar export profiles.
80
81
Returns:
82
str: MD5 hash of normalized export table, or None if no exports
83
"""
84
```
85
86
### Delay Import Analysis
87
88
Handle delay-loaded imports that are resolved at runtime.
89
90
```python { .api }
91
def parse_delay_import_directory(self, rva, size):
92
"""
93
Parse delay import directory.
94
95
Args:
96
rva (int): RVA of delay import directory
97
size (int): Size of directory
98
99
Populates:
100
self.DIRECTORY_ENTRY_DELAY_IMPORT: Delay import directory data
101
"""
102
```
103
104
### Bound Import Analysis
105
106
Analyze bound imports that have pre-resolved addresses for faster loading.
107
108
```python { .api }
109
def parse_directory_bound_imports(self, rva, size):
110
"""
111
Parse bound imports directory.
112
113
Args:
114
rva (int): RVA of bound imports directory
115
size (int): Size of directory
116
117
Populates:
118
self.DIRECTORY_ENTRY_BOUND_IMPORT: List of BoundImportDescData objects
119
"""
120
```
121
122
## Usage Examples
123
124
### Analyzing Imports
125
126
```python
127
import pefile
128
129
with pefile.PE('executable.exe') as pe:
130
# Check if imports are present
131
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
132
print("Import Analysis:")
133
print("-" * 40)
134
135
for entry in pe.DIRECTORY_ENTRY_IMPORT:
136
dll_name = entry.dll.decode('utf-8')
137
print(f"\nDLL: {dll_name}")
138
139
# List imported functions
140
for imp in entry.imports:
141
if imp.import_by_ordinal:
142
print(f" Ordinal: {imp.ordinal}")
143
else:
144
if imp.name:
145
func_name = imp.name.decode('utf-8')
146
print(f" Function: {func_name} (Hint: {imp.hint})")
147
148
print(f" Address: {hex(imp.address)}")
149
else:
150
print("No imports found")
151
```
152
153
### Analyzing Exports
154
155
```python
156
import pefile
157
158
with pefile.PE('library.dll') as pe:
159
# Check if exports are present
160
if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
161
export_dir = pe.DIRECTORY_ENTRY_EXPORT
162
163
print("Export Analysis:")
164
print("-" * 40)
165
166
# Export directory information
167
if hasattr(export_dir, 'struct'):
168
struct = export_dir.struct
169
print(f"DLL Name: {pe.get_string_at_rva(struct.Name).decode('utf-8')}")
170
print(f"Base Ordinal: {struct.Base}")
171
print(f"Number of Functions: {struct.NumberOfFunctions}")
172
print(f"Number of Names: {struct.NumberOfNames}")
173
174
# List exported symbols
175
print("\nExported Functions:")
176
for exp in export_dir.symbols:
177
if exp.name:
178
func_name = exp.name.decode('utf-8')
179
print(f" {func_name} @ {exp.ordinal} (RVA: {hex(exp.address)})")
180
else:
181
print(f" Ordinal {exp.ordinal} (RVA: {hex(exp.address)})")
182
183
# Check for forwarded exports
184
if exp.forwarder:
185
forwarder = exp.forwarder.decode('utf-8')
186
print(f" -> Forwarded to: {forwarder}")
187
else:
188
print("No exports found")
189
```
190
191
### Import/Export Hashing
192
193
```python
194
import pefile
195
196
# Compare files using import hashes
197
files = ['malware1.exe', 'malware2.exe', 'malware3.exe']
198
imphashes = {}
199
200
for filename in files:
201
with pefile.PE(filename) as pe:
202
imphash = pe.get_imphash()
203
if imphash:
204
imphashes[filename] = imphash
205
print(f"{filename}: {imphash}")
206
207
# Group files with same import hash
208
import_groups = {}
209
for filename, imphash in imphashes.items():
210
if imphash not in import_groups:
211
import_groups[imphash] = []
212
import_groups[imphash].append(filename)
213
214
# Display groups
215
for imphash, files in import_groups.items():
216
if len(files) > 1:
217
print(f"\nFiles with same import hash {imphash}:")
218
for filename in files:
219
print(f" {filename}")
220
```
221
222
### Export Hash Analysis
223
224
```python
225
import pefile
226
227
# Analyze DLL export patterns
228
dll_files = ['system32/kernel32.dll', 'system32/ntdll.dll', 'system32/user32.dll']
229
230
for dll_file in dll_files:
231
try:
232
with pefile.PE(dll_file) as pe:
233
exphash = pe.get_exphash()
234
if exphash:
235
print(f"{dll_file}: {exphash}")
236
else:
237
print(f"{dll_file}: No exports")
238
except Exception as e:
239
print(f"Error analyzing {dll_file}: {e}")
240
```
241
242
### Bound Import Analysis
243
244
```python
245
import pefile
246
247
with pefile.PE('executable.exe') as pe:
248
# Check for bound imports
249
if hasattr(pe, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
250
print("Bound Import Analysis:")
251
print("-" * 40)
252
253
for bound_import in pe.DIRECTORY_ENTRY_BOUND_IMPORT:
254
dll_name = bound_import.struct.ModuleName
255
timestamp = bound_import.struct.TimeDateStamp
256
257
print(f"DLL: {dll_name}")
258
print(f"Timestamp: {timestamp}")
259
260
# Check for bound references
261
if hasattr(bound_import, 'entries'):
262
for ref in bound_import.entries:
263
ref_name = ref.struct.ModuleName
264
ref_timestamp = ref.struct.TimeDateStamp
265
print(f" Reference: {ref_name} ({ref_timestamp})")
266
else:
267
print("No bound imports found")
268
```
269
270
### Delay Import Analysis
271
272
```python
273
import pefile
274
275
with pefile.PE('executable.exe') as pe:
276
# Check for delay imports
277
if hasattr(pe, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
278
print("Delay Import Analysis:")
279
print("-" * 40)
280
281
for delay_import in pe.DIRECTORY_ENTRY_DELAY_IMPORT:
282
dll_name = delay_import.dll
283
if dll_name:
284
print(f"\nDelay-loaded DLL: {dll_name.decode('utf-8')}")
285
286
# List delay-loaded functions
287
for imp in delay_import.imports:
288
if imp.name:
289
func_name = imp.name.decode('utf-8')
290
print(f" Function: {func_name}")
291
else:
292
print(f" Ordinal: {imp.ordinal}")
293
else:
294
print("No delay imports found")
295
```