Python PE parsing module for analyzing Portable Executable (PE) files with comprehensive header, section, and directory entry support
npx @tessl/cli install tessl/pypi-pefile@2024.8.0A comprehensive Python module for parsing and working with Portable Executable (PE) files. Pefile provides access to virtually all information contained in PE file headers, sections, and data directories, enabling detailed analysis and modification of Windows executable files, DLLs, and drivers.
pip install pefileimport pefileFor utilities and packer detection:
import peutilsFor ordinal lookups:
import ordlookupimport pefile
# Load PE file from path
pe = pefile.PE('path/to/executable.exe')
# Or load from raw data
with open('path/to/executable.exe', 'rb') as f:
pe = pefile.PE(data=f.read())
# Access basic information
print(f"Machine type: {pe.FILE_HEADER.Machine}")
print(f"Number of sections: {pe.FILE_HEADER.NumberOfSections}")
print(f"Is DLL: {pe.is_dll()}")
print(f"Is executable: {pe.is_exe()}")
# Access sections
for section in pe.sections:
print(f"Section: {section.Name.decode('utf-8').strip()}")
print(f"Virtual Address: 0x{section.VirtualAddress:08x}")
print(f"Size: {section.SizeOfRawData}")
# Access imports (if present)
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
for entry in pe.DIRECTORY_ENTRY_IMPORT:
print(f"DLL: {entry.dll.decode('utf-8')}")
for imp in entry.imports:
if imp.name:
print(f" Function: {imp.name.decode('utf-8')}")
# Clean up resources
pe.close()The pefile module is built around a hierarchical structure that mirrors the PE file format:
The module handles corrupted and malformed PE files gracefully, making it suitable for malware analysis and security research.
Core functionality for loading, parsing, and accessing PE file structures including headers, sections, and data directories.
class PE:
def __init__(self, name=None, data=None, fast_load=None, max_symbol_exports=8192, max_repeated_symbol=120): ...
def __enter__(self): ...
def __exit__(self, type, value, traceback): ...
def close(self): ...
def write(self, filename=None): ...
def full_load(self): ...Methods for reading and writing data within PE files, including address translation between file offsets and relative virtual addresses (RVAs).
def get_data(self, rva=0, length=None): ...
def get_string_at_rva(self, rva, max_length=1048576): ...
def get_string_u_at_rva(self, rva, max_length=65536, encoding=None): ...
def get_dword_at_rva(self, rva): ...
def get_word_at_rva(self, rva): ...
def get_qword_at_rva(self, rva): ...
def set_dword_at_rva(self, rva, dword): ...
def set_word_at_rva(self, rva, word): ...
def set_qword_at_rva(self, rva, qword): ...
def set_bytes_at_rva(self, rva, data): ...
def get_offset_from_rva(self, rva): ...
def get_rva_from_offset(self, offset): ...
def get_physical_by_rva(self, rva): ...Functionality for analyzing import and export tables, including generation of import/export hashes for malware analysis.
def get_imphash(self): ...
def get_exphash(self): ...
def parse_import_directory(self, rva, size, dllnames_only=False): ...
def parse_export_directory(self, rva, size, forwarded_only=False): ...Methods for working with PE sections, including accessing section data and metadata.
def get_section_by_rva(self, rva): ...
def get_section_by_offset(self, offset): ...
def merge_modified_section_data(self): ...Functions for memory mapping PE files and handling base relocations for different load addresses.
def get_memory_mapped_image(self, max_virtual_address=268435456, ImageBase=None): ...
def relocate_image(self, new_ImageBase): ...
def has_relocs(self): ...
def has_dynamic_relocs(self): ...
def get_overlay(self): ...
def get_overlay_data_start_offset(self): ...
def trim(self): ...Access to embedded resources including strings, icons, version information, and other resource types.
def get_resources_strings(self): ...
def parse_resources_directory(self, rva, size=0, base_rva=None, level=0, dirs=None): ...
def parse_version_information(self, version_struct): ...Access to debug directories and related debugging information embedded in PE files.
def parse_debug_directory(self, rva, size): ...Checksum verification and various hash calculation methods for file integrity and identification.
def verify_checksum(self): ...
def generate_checksum(self): ...
def get_rich_header_hash(self, algorithm="md5"): ...
def is_exe(self): ...
def is_dll(self): ...
def is_driver(self): ...Utilities for detecting packed executables and identifying packers/compilers using signature databases.
class SignatureDatabase:
def __init__(self, filename=None, data=None): ...
def match(self, pe, ep_only=True, section_start_only=False): ...
def match_all(self, pe, ep_only=True, section_start_only=False): ...
def load(self, filename=None, data=None): ...
def generate_ep_signature(self, pe, name, sig_length=512): ...
def is_probably_packed(pe): ...
def is_suspicious(pe): ...
def is_valid(pe): ...Database of ordinal to symbol name mappings for common Windows DLLs.
def ordLookup(libname, ord_val, make_name=False): ...
def formatOrdString(ord_val): ...class PE:
"""Main PE file parser class."""
DOS_HEADER: Structure
NT_HEADERS: Structure
FILE_HEADER: Structure
OPTIONAL_HEADER: Structure
sections: list
class Structure:
"""Base class for binary data structures."""
def __init__(self, format, name=None, file_offset=None): ...
def get_field_absolute_offset(self, field_name): ...
def get_field_relative_offset(self, field_name): ...
def sizeof(self): ...
def dump(self, indentation=0): ...
def dump_dict(self): ...
class SectionStructure(Structure):
"""Section structure with data access methods."""
def get_data(self, start=None, length=None, ignore_padding=False): ...
def get_entropy(self): ...
def get_hash_md5(self): ...
def get_hash_sha1(self): ...
def get_hash_sha256(self): ...
def get_hash_sha512(self): ...
def contains_rva(self, rva): ...
def contains_offset(self, offset): ...
class ImportDescData:
"""Import descriptor data container."""
struct: Structure
imports: list
dll: bytes
class ImportData:
"""Individual import data container."""
struct: Structure
name: bytes
import_by_ordinal: bool
ordinal: int
bound: int
address: int
hint: int
class ExportDirData:
"""Export directory data container."""
struct: Structure
symbols: list
class ExportData:
"""Individual export data container."""
struct: Structure
name: bytes
ordinal: int
address: int
forwarder: bytes
class ResourceDirData:
"""Resource directory data container."""
struct: Structure
entries: list
class DebugData:
"""Debug directory data container."""
struct: Structure
entry: Structure
class BaseRelocationData:
"""Base relocation data container."""
struct: Structure
entries: list
class RelocationData:
"""Individual relocation data container."""
struct: Structure
type: int
base_rva: int
rva: int
class TlsData:
"""TLS directory data container."""
struct: Structure
class BoundImportDescData:
"""Bound import descriptor data container."""
struct: Structure
entries: list
class LoadConfigData:
"""Load config data container."""
struct: Structure
class SignatureDatabase:
"""PEiD signature database for packer detection."""
signature_tree_eponly_true: dict
signature_tree_eponly_false: dict
signature_tree_section_start: dict
signature_count_eponly_true: int
signature_count_eponly_false: int
signature_count_section_start: int
max_depth: int
class PEFormatError(Exception):
"""Exception raised for PE format errors."""
pass