Python PE parsing module for analyzing Portable Executable (PE) files with comprehensive header, section, and directory entry support
—
Functionality for analyzing import and export tables, including generation of import/export hashes for malware analysis. These features enable detailed examination of PE file dependencies and exported functionality.
Examine imported functions and DLLs used by the PE file.
def parse_import_directory(self, rva, size, dllnames_only=False):
"""
Parse import directory at specified RVA.
Args:
rva (int): RVA of import directory
size (int): Size of import directory
dllnames_only (bool): If True, only parse DLL names for performance
Populates:
self.DIRECTORY_ENTRY_IMPORT: List of ImportDescData objects
"""
def get_import_table(self, rva, max_length=None, contains_addresses=False):
"""
Get import table data.
Args:
rva (int): RVA of import table
max_length (int, optional): Maximum length to parse
contains_addresses (bool): Whether table contains addresses instead of RVAs
Returns:
list: Import table entries
"""Examine functions and data exported by the PE file.
def parse_export_directory(self, rva, size, forwarded_only=False):
"""
Parse export directory at specified RVA.
Args:
rva (int): RVA of export directory
size (int): Size of export directory
forwarded_only (bool): If True, only parse forwarded exports
Populates:
self.DIRECTORY_ENTRY_EXPORT: ExportDirData object
"""Generate hashes for import and export tables used in malware analysis.
def get_imphash(self):
"""
Generate import hash (imphash) for the PE file.
The imphash is calculated from the imported DLLs and function names,
providing a way to identify similar malware families that use the
same import patterns.
Returns:
str: MD5 hash of normalized import table, or None if no imports
"""
def get_exphash(self):
"""
Generate export hash (exphash) for the PE file.
The exphash is calculated from exported function names and ordinals,
useful for identifying PE files with similar export profiles.
Returns:
str: MD5 hash of normalized export table, or None if no exports
"""Handle delay-loaded imports that are resolved at runtime.
def parse_delay_import_directory(self, rva, size):
"""
Parse delay import directory.
Args:
rva (int): RVA of delay import directory
size (int): Size of directory
Populates:
self.DIRECTORY_ENTRY_DELAY_IMPORT: Delay import directory data
"""Analyze bound imports that have pre-resolved addresses for faster loading.
def parse_directory_bound_imports(self, rva, size):
"""
Parse bound imports directory.
Args:
rva (int): RVA of bound imports directory
size (int): Size of directory
Populates:
self.DIRECTORY_ENTRY_BOUND_IMPORT: List of BoundImportDescData objects
"""import pefile
with pefile.PE('executable.exe') as pe:
# Check if imports are present
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
print("Import Analysis:")
print("-" * 40)
for entry in pe.DIRECTORY_ENTRY_IMPORT:
dll_name = entry.dll.decode('utf-8')
print(f"\nDLL: {dll_name}")
# List imported functions
for imp in entry.imports:
if imp.import_by_ordinal:
print(f" Ordinal: {imp.ordinal}")
else:
if imp.name:
func_name = imp.name.decode('utf-8')
print(f" Function: {func_name} (Hint: {imp.hint})")
print(f" Address: {hex(imp.address)}")
else:
print("No imports found")import pefile
with pefile.PE('library.dll') as pe:
# Check if exports are present
if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
export_dir = pe.DIRECTORY_ENTRY_EXPORT
print("Export Analysis:")
print("-" * 40)
# Export directory information
if hasattr(export_dir, 'struct'):
struct = export_dir.struct
print(f"DLL Name: {pe.get_string_at_rva(struct.Name).decode('utf-8')}")
print(f"Base Ordinal: {struct.Base}")
print(f"Number of Functions: {struct.NumberOfFunctions}")
print(f"Number of Names: {struct.NumberOfNames}")
# List exported symbols
print("\nExported Functions:")
for exp in export_dir.symbols:
if exp.name:
func_name = exp.name.decode('utf-8')
print(f" {func_name} @ {exp.ordinal} (RVA: {hex(exp.address)})")
else:
print(f" Ordinal {exp.ordinal} (RVA: {hex(exp.address)})")
# Check for forwarded exports
if exp.forwarder:
forwarder = exp.forwarder.decode('utf-8')
print(f" -> Forwarded to: {forwarder}")
else:
print("No exports found")import pefile
# Compare files using import hashes
files = ['malware1.exe', 'malware2.exe', 'malware3.exe']
imphashes = {}
for filename in files:
with pefile.PE(filename) as pe:
imphash = pe.get_imphash()
if imphash:
imphashes[filename] = imphash
print(f"{filename}: {imphash}")
# Group files with same import hash
import_groups = {}
for filename, imphash in imphashes.items():
if imphash not in import_groups:
import_groups[imphash] = []
import_groups[imphash].append(filename)
# Display groups
for imphash, files in import_groups.items():
if len(files) > 1:
print(f"\nFiles with same import hash {imphash}:")
for filename in files:
print(f" {filename}")import pefile
# Analyze DLL export patterns
dll_files = ['system32/kernel32.dll', 'system32/ntdll.dll', 'system32/user32.dll']
for dll_file in dll_files:
try:
with pefile.PE(dll_file) as pe:
exphash = pe.get_exphash()
if exphash:
print(f"{dll_file}: {exphash}")
else:
print(f"{dll_file}: No exports")
except Exception as e:
print(f"Error analyzing {dll_file}: {e}")import pefile
with pefile.PE('executable.exe') as pe:
# Check for bound imports
if hasattr(pe, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
print("Bound Import Analysis:")
print("-" * 40)
for bound_import in pe.DIRECTORY_ENTRY_BOUND_IMPORT:
dll_name = bound_import.struct.ModuleName
timestamp = bound_import.struct.TimeDateStamp
print(f"DLL: {dll_name}")
print(f"Timestamp: {timestamp}")
# Check for bound references
if hasattr(bound_import, 'entries'):
for ref in bound_import.entries:
ref_name = ref.struct.ModuleName
ref_timestamp = ref.struct.TimeDateStamp
print(f" Reference: {ref_name} ({ref_timestamp})")
else:
print("No bound imports found")import pefile
with pefile.PE('executable.exe') as pe:
# Check for delay imports
if hasattr(pe, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
print("Delay Import Analysis:")
print("-" * 40)
for delay_import in pe.DIRECTORY_ENTRY_DELAY_IMPORT:
dll_name = delay_import.dll
if dll_name:
print(f"\nDelay-loaded DLL: {dll_name.decode('utf-8')}")
# List delay-loaded functions
for imp in delay_import.imports:
if imp.name:
func_name = imp.name.decode('utf-8')
print(f" Function: {func_name}")
else:
print(f" Ordinal: {imp.ordinal}")
else:
print("No delay imports found")Install with Tessl CLI
npx tessl i tessl/pypi-pefile