Python PE parsing module for analyzing Portable Executable (PE) files with comprehensive header, section, and directory entry support
—
Checksum verification and various hash calculation methods for file integrity and identification. These functions provide cryptographic and structural hashes for PE files.
Verify and generate PE file checksums for integrity validation.
def verify_checksum(self):
"""
Verify PE file checksum against calculated checksum.
Returns:
bool: True if checksum is valid, False otherwise
Note:
Compares the checksum in the optional header with the
calculated checksum of the entire file.
"""
def generate_checksum(self):
"""
Calculate PE file checksum.
Returns:
int: Calculated checksum value
Note:
Uses the standard PE checksum algorithm which sums
all words in the file while excluding the checksum field.
"""Generate hash of Rich header for compiler identification.
def get_rich_header_hash(self, algorithm="md5"):
"""
Get hash of Rich header data.
Args:
algorithm (str): Hash algorithm to use ("md5", "sha1", "sha256")
Returns:
str: Hex-encoded hash of Rich header, or None if no Rich header
Note:
Rich header contains compiler and linker information.
Hash can be used to identify build toolchain.
"""import pefile
with pefile.PE('executable.exe') as pe:
# Get stored checksum
stored_checksum = pe.OPTIONAL_HEADER.CheckSum
print(f"Stored checksum: 0x{stored_checksum:08x}")
# Calculate actual checksum
calculated_checksum = pe.generate_checksum()
print(f"Calculated checksum: 0x{calculated_checksum:08x}")
# Verify checksum
is_valid = pe.verify_checksum()
print(f"Checksum valid: {is_valid}")
if not is_valid:
print("Warning: File checksum is invalid!")
print("This could indicate file corruption or modification.")import pefile
with pefile.PE('executable.exe') as pe:
# Check if Rich header is present
if hasattr(pe, 'RICH_HEADER'):
print("Rich Header Analysis:")
print("-" * 30)
rich_header = pe.RICH_HEADER
# Display Rich header information
if hasattr(rich_header, 'checksum'):
print(f"Checksum: 0x{rich_header.checksum:08x}")
if hasattr(rich_header, 'values'):
print(f"Number of entries: {len(rich_header.values)}")
print("\nCompiler/Tool Information:")
for i, entry in enumerate(rich_header.values):
if hasattr(entry, 'id') and hasattr(entry, 'version') and hasattr(entry, 'times'):
print(f" Entry {i}: ID=0x{entry.id:04x}, Version={entry.version}, Count={entry.times}")
# Generate Rich header hashes
hash_algorithms = ['md5', 'sha1', 'sha256']
print("\nRich Header Hashes:")
for algorithm in hash_algorithms:
try:
hash_value = pe.get_rich_header_hash(algorithm)
if hash_value:
print(f" {algorithm.upper()}: {hash_value}")
else:
print(f" {algorithm.upper()}: Not available")
except Exception as e:
print(f" {algorithm.upper()}: Error - {e}")
else:
print("No Rich header found")import pefile
import hashlib
def comprehensive_hash_analysis(filename):
"""Perform comprehensive hash analysis of PE file."""
print(f"Hash Analysis: {filename}")
print("=" * 50)
# Read file data
with open(filename, 'rb') as f:
file_data = f.read()
# Calculate file hashes
print("File Hashes:")
print("-" * 20)
print(f"MD5: {hashlib.md5(file_data).hexdigest()}")
print(f"SHA1: {hashlib.sha1(file_data).hexdigest()}")
print(f"SHA256: {hashlib.sha256(file_data).hexdigest()}")
# PE-specific hashes
with pefile.PE(filename) as pe:
print(f"\nPE Checksum: 0x{pe.OPTIONAL_HEADER.CheckSum:08x}")
print(f"Calculated Checksum: 0x{pe.generate_checksum():08x}")
print(f"Checksum Valid: {pe.verify_checksum()}")
# Import/Export hashes
imphash = pe.get_imphash()
if imphash:
print(f"Import Hash: {imphash}")
exphash = pe.get_exphash()
if exphash:
print(f"Export Hash: {exphash}")
# Rich header hash
rich_hash = pe.get_rich_header_hash()
if rich_hash:
print(f"Rich Header Hash: {rich_hash}")
# Section hashes
print(f"\nSection Hashes:")
print("-" * 20)
for section in pe.sections:
name = section.Name.decode('utf-8').strip('\x00')
sha256 = section.get_hash_sha256()
print(f"{name:<10}: {sha256}")
# Usage
comprehensive_hash_analysis('executable.exe')import pefile
from collections import defaultdict
def analyze_malware_hashes(file_list):
"""Group files by similar hash characteristics."""
hash_groups = defaultdict(list)
print("Malware Hash Analysis:")
print("=" * 40)
for filename in file_list:
try:
with pefile.PE(filename) as pe:
# Get various hashes
imphash = pe.get_imphash()
rich_hash = pe.get_rich_header_hash()
# Create hash signature
signature = f"{imphash or 'None'}:{rich_hash or 'None'}"
hash_groups[signature].append(filename)
print(f"\n{filename}:")
print(f" Import Hash: {imphash or 'None'}")
print(f" Rich Header Hash: {rich_hash or 'None'}")
except Exception as e:
print(f"Error analyzing {filename}: {e}")
# Display potential families
print(f"\n\nPotential Malware Families:")
print("-" * 30)
for signature, files in hash_groups.items():
if len(files) > 1: # Only show groups with multiple files
imphash, rich_hash = signature.split(':')
print(f"\nFamily (Import: {imphash}, Rich: {rich_hash}):")
for filename in files:
print(f" - {filename}")
# Usage example
malware_files = [
'sample1.exe',
'sample2.exe',
'sample3.exe'
]
analyze_malware_hashes(malware_files)Install with Tessl CLI
npx tessl i tessl/pypi-pefile