0
# Wheel Content Analysis
1
2
Core functionality for parsing wheel files and extracting their internal structure. Provides detailed representation of files, directories, metadata, and distribution information contained within Python wheel packages.
3
4
## Capabilities
5
6
### Wheel Content Parsing
7
8
Parses wheel files to extract complete file structure, metadata, and distribution information. Handles both pure Python and platform-specific wheels.
9
10
```python { .api }
11
class WheelContents:
12
"""Representation of the contents of a wheel"""
13
14
def __init__(self, dist_info_dir: str, data_dir: str = "",
15
root_is_purelib: bool = True):
16
"""
17
Initialize wheel contents representation.
18
19
Parameters:
20
- dist_info_dir: Name of the wheel's .dist-info directory
21
- data_dir: Name of the wheel's .data directory, if any
22
- root_is_purelib: Whether root contains purelib or platlib files
23
"""
24
25
@classmethod
26
def from_wheel(cls, path: str | os.PathLike) -> 'WheelContents':
27
"""
28
Create WheelContents from a wheel file.
29
30
Parameters:
31
- path: Path to the wheel file
32
33
Returns:
34
WheelContents instance
35
36
Raises:
37
- WheelValidationError: If wheel is invalid or malformed
38
- InvalidFilenameError: If wheel filename is invalid
39
"""
40
41
@property
42
def purelib_tree(self) -> Directory:
43
"""
44
Get the purelib section of the wheel's file tree.
45
46
Returns:
47
Directory tree for pure Python files
48
"""
49
50
@property
51
def platlib_tree(self) -> Directory:
52
"""
53
Get the platlib section of the wheel's file tree.
54
55
Returns:
56
Directory tree for platform-specific files
57
"""
58
```
59
60
### File Tree Representation
61
62
Hierarchical representation of files and directories within wheels, providing navigation and analysis capabilities.
63
64
```python { .api }
65
class File:
66
"""Representation of a file in a file tree"""
67
68
def __init__(self, parts: tuple[str, ...], size: int | None = None,
69
hashsum: str | None = None):
70
"""
71
Initialize file representation.
72
73
Parameters:
74
- parts: Components of the file's path within the tree
75
- size: File size in bytes, or None if unknown
76
- hashsum: Hash of file contents in {alg}={digest} format
77
"""
78
79
@classmethod
80
def from_record_row(cls, row: list[str]) -> 'File':
81
"""
82
Create File from RECORD file row.
83
84
Parameters:
85
- row: List of fields from wheel RECORD file
86
87
Returns:
88
File instance
89
90
Raises:
91
- WheelValidationError: If RECORD entry is invalid
92
"""
93
94
@property
95
def path(self) -> str:
96
"""Get the full path of the file"""
97
98
@property
99
def signature(self) -> tuple[int | None, str | None]:
100
"""Get the file's size and hash signature"""
101
102
@property
103
def extension(self) -> str:
104
"""Get the file's filename extension"""
105
106
def has_module_ext(self) -> bool:
107
"""Check if file has Python module extension (.py, .so, .pyd, etc.)"""
108
109
def is_valid_module_path(self) -> bool:
110
"""Check if file path is a valid importable Python module path"""
111
112
class Directory:
113
"""Representation of a directory in a file tree"""
114
115
def __init__(self, path: str | None = None):
116
"""
117
Initialize directory representation.
118
119
Parameters:
120
- path: Directory path, or None for root
121
"""
122
123
@classmethod
124
def from_local_tree(cls, root: Path, exclude: list[str] | None = None,
125
include_root: bool = True) -> 'Directory':
126
"""
127
Build directory tree mirroring local filesystem structure.
128
129
Parameters:
130
- root: Root directory path to mirror
131
- exclude: Glob patterns to exclude during traversal
132
- include_root: Whether to include root directory name in paths
133
134
Returns:
135
Directory representing the tree structure
136
"""
137
138
@property
139
def files(self) -> dict[str, File]:
140
"""Files in the directory, as mapping from basenames to File objects"""
141
142
@property
143
def subdirectories(self) -> dict[str, 'Directory']:
144
"""Subdirectories in the directory, as mapping from basenames to Directory objects"""
145
146
def add_entry(self, entry: File | 'Directory') -> None:
147
"""
148
Insert a File or Directory into the tree, creating intermediate dirs as needed.
149
150
Parameters:
151
- entry: File or Directory to add to the tree
152
"""
153
154
def all_files(self) -> Iterator[File]:
155
"""Return generator of all File objects in the tree rooted at this directory"""
156
157
def __getitem__(self, name: str) -> File | 'Directory':
158
"""Retrieve an entry from the directory by basename"""
159
160
def __contains__(self, name: str) -> bool:
161
"""Check if directory contains an entry with given basename"""
162
```
163
164
### Usage Examples
165
166
```python
167
from pathlib import Path
168
from check_wheel_contents import WheelContents
169
170
# Load a wheel
171
wheel_path = Path("dist/mypackage-1.0.0-py3-none-any.whl")
172
contents = WheelContents.from_wheel(wheel_path)
173
174
# Examine wheel structure
175
print(f"Dist-info directory: {contents.dist_info_dir}")
176
print(f"Data directory: {contents.data_dir}")
177
print(f"Root is purelib: {contents.root_is_purelib}")
178
179
# Navigate file tree
180
purelib = contents.purelib_tree
181
print(f"Purelib subdirectories: {list(purelib.subdirectories.keys())}")
182
print(f"Purelib files: {list(purelib.files.keys())}")
183
184
# Find specific files
185
if "mypackage" in purelib.entries:
186
mypackage_dir = purelib["mypackage"]
187
if isinstance(mypackage_dir, Directory) and "__init__.py" in mypackage_dir:
188
init_file = mypackage_dir["__init__.py"]
189
print(f"Package __init__.py size: {init_file.size}")
190
191
# Iterate through all files in tree
192
for file in purelib.all_files():
193
print(f"File: {file.path} ({file.size} bytes)")
194
195
# Check for duplicates
196
duplicates = {}
197
for signature, files in contents.by_signature.items():
198
if len(files) > 1:
199
duplicates[signature] = [f.path for f in files]
200
201
if duplicates:
202
print("Duplicate files found:")
203
for sig, paths in duplicates.items():
204
print(f" {sig}: {paths}")
205
```