Python PE parsing module for analyzing Portable Executable (PE) files with comprehensive header, section, and directory entry support
—
Functions for memory mapping PE files and handling base relocations for different load addresses. These capabilities enable proper loading and execution of PE files at arbitrary memory locations.
Create memory-mapped representation of the PE file as it would appear when loaded.
def get_memory_mapped_image(self, max_virtual_address=268435456, ImageBase=None):
"""
Get memory layout of PE file as it would appear when loaded.
Args:
max_virtual_address (int): Maximum virtual address to map (default: 256MB)
ImageBase (int, optional): Base address to use. If None, uses PE's ImageBase.
Returns:
bytes: Memory-mapped image data
Note:
This creates a complete memory layout with sections mapped to their
virtual addresses, filling gaps with zeros as needed.
"""Handle relocations needed when PE is loaded at different base address.
def relocate_image(self, new_ImageBase):
"""
Apply base relocations for new ImageBase address.
Args:
new_ImageBase (int): New base address for the image
Note:
Modifies the PE file data to update all absolute addresses
based on the new base address. Requires relocation table.
"""
def has_relocs(self):
"""
Check if PE file has relocation table.
Returns:
bool: True if file has base relocations
"""
def has_dynamic_relocs(self):
"""
Check if PE file has dynamic relocations.
Returns:
bool: True if file has dynamic relocations
"""Handle data appended beyond the standard PE structure.
def get_overlay(self):
"""
Get overlay data appended to PE file.
Returns:
bytes: Overlay data, or None if no overlay present
Note:
Overlay is data appended beyond the PE file structure,
often used by installers or self-extracting archives.
"""
def get_overlay_data_start_offset(self):
"""
Get file offset where overlay data begins.
Returns:
int: File offset of overlay start, or None if no overlay
"""
def trim(self):
"""
Remove overlay data from PE file.
Note:
Removes any data beyond the PE file structure,
effectively stripping overlay data.
"""Parse and analyze relocation directory entries.
def parse_relocations_directory(self, rva, size):
"""
Parse base relocations directory.
Args:
rva (int): RVA of relocations directory
size (int): Size of relocations directory
Populates:
self.DIRECTORY_ENTRY_BASERELOC: List of BaseRelocationData objects
"""import pefile
with pefile.PE('executable.exe') as pe:
# Get memory-mapped image at original base address
memory_image = pe.get_memory_mapped_image()
print(f"Memory image size: {len(memory_image)} bytes")
print(f"Original ImageBase: 0x{pe.OPTIONAL_HEADER.ImageBase:08x}")
# Save memory image
with open('memory_image.bin', 'wb') as f:
f.write(memory_image)
# Get memory image at different base address
new_base = 0x10000000
relocated_image = pe.get_memory_mapped_image(ImageBase=new_base)
with open('relocated_image.bin', 'wb') as f:
f.write(relocated_image)import pefile
with pefile.PE('executable.exe') as pe:
# Check if file has relocations
if pe.has_relocs():
print("File has base relocations")
if hasattr(pe, 'DIRECTORY_ENTRY_BASERELOC'):
total_relocs = 0
print("\nRelocation Blocks:")
print("-" * 40)
for reloc_block in pe.DIRECTORY_ENTRY_BASERELOC:
base_rva = reloc_block.struct.VirtualAddress
block_size = reloc_block.struct.SizeOfBlock
num_entries = len(reloc_block.entries)
print(f"Base RVA: 0x{base_rva:08x}")
print(f"Block Size: {block_size}")
print(f"Entries: {num_entries}")
# Show first few relocations in block
for i, entry in enumerate(reloc_block.entries[:5]):
if entry.type != 0: # Skip padding entries
rva = base_rva + entry.rva
print(f" 0x{rva:08x}: Type {entry.type}")
if len(reloc_block.entries) > 5:
print(f" ... and {len(reloc_block.entries) - 5} more")
total_relocs += num_entries
print()
print(f"Total relocations: {total_relocs}")
else:
print("File has no base relocations")
# Check for dynamic relocations
if pe.has_dynamic_relocs():
print("File has dynamic relocations")import pefile
def analyze_address_space(pe):
"""Analyze PE address space layout."""
print("Address Space Layout:")
print("-" * 50)
image_base = pe.OPTIONAL_HEADER.ImageBase
size_of_image = pe.OPTIONAL_HEADER.SizeOfImage
print(f"ImageBase: 0x{image_base:08x}")
print(f"Size of Image: 0x{size_of_image:08x}")
print(f"Address Range: 0x{image_base:08x} - 0x{image_base + size_of_image:08x}")
print()
print("Section Virtual Layout:")
print(f"{'Section':<10} {'VirtAddr':<12} {'VirtSize':<12} {'End Addr':<12}")
print("-" * 50)
for section in pe.sections:
name = section.Name.decode('utf-8').strip('\x00')
virt_addr = image_base + section.VirtualAddress
virt_size = section.VirtualSize
end_addr = virt_addr + virt_size
print(f"{name:<10} 0x{virt_addr:08x} 0x{virt_size:08x} 0x{end_addr:08x}")
# Usage
with pefile.PE('executable.exe') as pe:
analyze_address_space(pe)import pefile
# Load PE file
pe = pefile.PE('executable.exe')
# Check current base address
original_base = pe.OPTIONAL_HEADER.ImageBase
print(f"Original ImageBase: 0x{original_base:08x}")
# Check if relocation is possible
if pe.has_relocs():
print("File supports relocation")
# Relocate to new base address
new_base = 0x20000000
print(f"Relocating to: 0x{new_base:08x}")
pe.relocate_image(new_base)
# Verify the change
updated_base = pe.OPTIONAL_HEADER.ImageBase
print(f"Updated ImageBase: 0x{updated_base:08x}")
# Save relocated PE
pe.write('relocated_executable.exe')
print("Relocated executable saved")
else:
print("File does not support relocation (no relocation table)")
pe.close()import pefile
with pefile.PE('executable.exe') as pe:
def virtual_to_physical(virtual_addr):
"""Convert virtual address to file offset."""
# Convert VA to RVA
image_base = pe.OPTIONAL_HEADER.ImageBase
rva = virtual_addr - image_base
# Convert RVA to file offset
file_offset = pe.get_offset_from_rva(rva)
return file_offset
def physical_to_virtual(file_offset):
"""Convert file offset to virtual address."""
# Convert file offset to RVA
rva = pe.get_rva_from_offset(file_offset)
if rva is not None:
# Convert RVA to VA
image_base = pe.OPTIONAL_HEADER.ImageBase
virtual_addr = image_base + rva
return virtual_addr
return None
# Example translations
entry_point_va = pe.OPTIONAL_HEADER.ImageBase + pe.OPTIONAL_HEADER.AddressOfEntryPoint
entry_point_offset = virtual_to_physical(entry_point_va)
print(f"Entry Point VA: 0x{entry_point_va:08x}")
print(f"Entry Point File Offset: 0x{entry_point_offset:08x}")
# Verify round-trip conversion
back_to_va = physical_to_virtual(entry_point_offset)
print(f"Round-trip VA: 0x{back_to_va:08x}")import pefile
def analyze_memory_protection(pe):
"""Analyze section memory protection characteristics."""
print("Memory Protection Analysis:")
print("-" * 60)
print(f"{'Section':<10} {'Protection':<15} {'Characteristics':<30}")
print("-" * 60)
for section in pe.sections:
name = section.Name.decode('utf-8').strip('\x00')
chars = section.Characteristics
# Determine protection flags
protection = []
if chars & 0x20000000: # IMAGE_SCN_MEM_EXECUTE
protection.append('X')
if chars & 0x40000000: # IMAGE_SCN_MEM_READ
protection.append('R')
if chars & 0x80000000: # IMAGE_SCN_MEM_WRITE
protection.append('W')
prot_str = ''.join(protection) if protection else 'None'
# Decode other characteristics
char_flags = []
if chars & 0x00000020: # IMAGE_SCN_CNT_CODE
char_flags.append('CODE')
if chars & 0x00000040: # IMAGE_SCN_CNT_INITIALIZED_DATA
char_flags.append('INIT_DATA')
if chars & 0x00000080: # IMAGE_SCN_CNT_UNINITIALIZED_DATA
char_flags.append('UNINIT_DATA')
if chars & 0x02000000: # IMAGE_SCN_MEM_DISCARDABLE
char_flags.append('DISCARDABLE')
if chars & 0x10000000: # IMAGE_SCN_MEM_SHARED
char_flags.append('SHARED')
char_str = ', '.join(char_flags[:3]) # Limit for display
print(f"{name:<10} {prot_str:<15} {char_str:<30}")
# Usage
with pefile.PE('executable.exe') as pe:
analyze_memory_protection(pe)Install with Tessl CLI
npx tessl i tessl/pypi-pefile