Tool for validating Python wheel contents to detect common packaging errors and ensure proper distribution structure
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Core functionality for parsing wheel files and extracting their internal structure. Provides detailed representation of files, directories, metadata, and distribution information contained within Python wheel packages.
Parses wheel files to extract complete file structure, metadata, and distribution information. Handles both pure Python and platform-specific wheels.
class WheelContents:
"""Representation of the contents of a wheel"""
def __init__(self, dist_info_dir: str, data_dir: str = "",
root_is_purelib: bool = True):
"""
Initialize wheel contents representation.
Parameters:
- dist_info_dir: Name of the wheel's .dist-info directory
- data_dir: Name of the wheel's .data directory, if any
- root_is_purelib: Whether root contains purelib or platlib files
"""
@classmethod
def from_wheel(cls, path: str | os.PathLike) -> 'WheelContents':
"""
Create WheelContents from a wheel file.
Parameters:
- path: Path to the wheel file
Returns:
WheelContents instance
Raises:
- WheelValidationError: If wheel is invalid or malformed
- InvalidFilenameError: If wheel filename is invalid
"""
@property
def purelib_tree(self) -> Directory:
"""
Get the purelib section of the wheel's file tree.
Returns:
Directory tree for pure Python files
"""
@property
def platlib_tree(self) -> Directory:
"""
Get the platlib section of the wheel's file tree.
Returns:
Directory tree for platform-specific files
"""Hierarchical representation of files and directories within wheels, providing navigation and analysis capabilities.
class File:
"""Representation of a file in a file tree"""
def __init__(self, parts: tuple[str, ...], size: int | None = None,
hashsum: str | None = None):
"""
Initialize file representation.
Parameters:
- parts: Components of the file's path within the tree
- size: File size in bytes, or None if unknown
- hashsum: Hash of file contents in {alg}={digest} format
"""
@classmethod
def from_record_row(cls, row: list[str]) -> 'File':
"""
Create File from RECORD file row.
Parameters:
- row: List of fields from wheel RECORD file
Returns:
File instance
Raises:
- WheelValidationError: If RECORD entry is invalid
"""
@property
def path(self) -> str:
"""Get the full path of the file"""
@property
def signature(self) -> tuple[int | None, str | None]:
"""Get the file's size and hash signature"""
@property
def extension(self) -> str:
"""Get the file's filename extension"""
def has_module_ext(self) -> bool:
"""Check if file has Python module extension (.py, .so, .pyd, etc.)"""
def is_valid_module_path(self) -> bool:
"""Check if file path is a valid importable Python module path"""
class Directory:
"""Representation of a directory in a file tree"""
def __init__(self, path: str | None = None):
"""
Initialize directory representation.
Parameters:
- path: Directory path, or None for root
"""
@classmethod
def from_local_tree(cls, root: Path, exclude: list[str] | None = None,
include_root: bool = True) -> 'Directory':
"""
Build directory tree mirroring local filesystem structure.
Parameters:
- root: Root directory path to mirror
- exclude: Glob patterns to exclude during traversal
- include_root: Whether to include root directory name in paths
Returns:
Directory representing the tree structure
"""
@property
def files(self) -> dict[str, File]:
"""Files in the directory, as mapping from basenames to File objects"""
@property
def subdirectories(self) -> dict[str, 'Directory']:
"""Subdirectories in the directory, as mapping from basenames to Directory objects"""
def add_entry(self, entry: File | 'Directory') -> None:
"""
Insert a File or Directory into the tree, creating intermediate dirs as needed.
Parameters:
- entry: File or Directory to add to the tree
"""
def all_files(self) -> Iterator[File]:
"""Return generator of all File objects in the tree rooted at this directory"""
def __getitem__(self, name: str) -> File | 'Directory':
"""Retrieve an entry from the directory by basename"""
def __contains__(self, name: str) -> bool:
"""Check if directory contains an entry with given basename"""from pathlib import Path
from check_wheel_contents import WheelContents
# Load a wheel
wheel_path = Path("dist/mypackage-1.0.0-py3-none-any.whl")
contents = WheelContents.from_wheel(wheel_path)
# Examine wheel structure
print(f"Dist-info directory: {contents.dist_info_dir}")
print(f"Data directory: {contents.data_dir}")
print(f"Root is purelib: {contents.root_is_purelib}")
# Navigate file tree
purelib = contents.purelib_tree
print(f"Purelib subdirectories: {list(purelib.subdirectories.keys())}")
print(f"Purelib files: {list(purelib.files.keys())}")
# Find specific files
if "mypackage" in purelib.entries:
mypackage_dir = purelib["mypackage"]
if isinstance(mypackage_dir, Directory) and "__init__.py" in mypackage_dir:
init_file = mypackage_dir["__init__.py"]
print(f"Package __init__.py size: {init_file.size}")
# Iterate through all files in tree
for file in purelib.all_files():
print(f"File: {file.path} ({file.size} bytes)")
# Check for duplicates
duplicates = {}
for signature, files in contents.by_signature.items():
if len(files) > 1:
duplicates[signature] = [f.path for f in files]
if duplicates:
print("Duplicate files found:")
for sig, paths in duplicates.items():
print(f" {sig}: {paths}")Install with Tessl CLI
npx tessl i tessl/pypi-check-wheel-contents