or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdindex.mdvalidation.mdwheel-analysis.md

wheel-analysis.mddocs/

0

# Wheel Content Analysis

1

2

Core functionality for parsing wheel files and extracting their internal structure. Provides detailed representation of files, directories, metadata, and distribution information contained within Python wheel packages.

3

4

## Capabilities

5

6

### Wheel Content Parsing

7

8

Parses wheel files to extract complete file structure, metadata, and distribution information. Handles both pure Python and platform-specific wheels.

9

10

```python { .api }

11

class WheelContents:

12

"""Representation of the contents of a wheel"""

13

14

def __init__(self, dist_info_dir: str, data_dir: str = "",

15

root_is_purelib: bool = True):

16

"""

17

Initialize wheel contents representation.

18

19

Parameters:

20

- dist_info_dir: Name of the wheel's .dist-info directory

21

- data_dir: Name of the wheel's .data directory, if any

22

- root_is_purelib: Whether root contains purelib or platlib files

23

"""

24

25

@classmethod

26

def from_wheel(cls, path: str | os.PathLike) -> 'WheelContents':

27

"""

28

Create WheelContents from a wheel file.

29

30

Parameters:

31

- path: Path to the wheel file

32

33

Returns:

34

WheelContents instance

35

36

Raises:

37

- WheelValidationError: If wheel is invalid or malformed

38

- InvalidFilenameError: If wheel filename is invalid

39

"""

40

41

@property

42

def purelib_tree(self) -> Directory:

43

"""

44

Get the purelib section of the wheel's file tree.

45

46

Returns:

47

Directory tree for pure Python files

48

"""

49

50

@property

51

def platlib_tree(self) -> Directory:

52

"""

53

Get the platlib section of the wheel's file tree.

54

55

Returns:

56

Directory tree for platform-specific files

57

"""

58

```

59

60

### File Tree Representation

61

62

Hierarchical representation of files and directories within wheels, providing navigation and analysis capabilities.

63

64

```python { .api }

65

class File:

66

"""Representation of a file in a file tree"""

67

68

def __init__(self, parts: tuple[str, ...], size: int | None = None,

69

hashsum: str | None = None):

70

"""

71

Initialize file representation.

72

73

Parameters:

74

- parts: Components of the file's path within the tree

75

- size: File size in bytes, or None if unknown

76

- hashsum: Hash of file contents in {alg}={digest} format

77

"""

78

79

@classmethod

80

def from_record_row(cls, row: list[str]) -> 'File':

81

"""

82

Create File from RECORD file row.

83

84

Parameters:

85

- row: List of fields from wheel RECORD file

86

87

Returns:

88

File instance

89

90

Raises:

91

- WheelValidationError: If RECORD entry is invalid

92

"""

93

94

@property

95

def path(self) -> str:

96

"""Get the full path of the file"""

97

98

@property

99

def signature(self) -> tuple[int | None, str | None]:

100

"""Get the file's size and hash signature"""

101

102

@property

103

def extension(self) -> str:

104

"""Get the file's filename extension"""

105

106

def has_module_ext(self) -> bool:

107

"""Check if file has Python module extension (.py, .so, .pyd, etc.)"""

108

109

def is_valid_module_path(self) -> bool:

110

"""Check if file path is a valid importable Python module path"""

111

112

class Directory:

113

"""Representation of a directory in a file tree"""

114

115

def __init__(self, path: str | None = None):

116

"""

117

Initialize directory representation.

118

119

Parameters:

120

- path: Directory path, or None for root

121

"""

122

123

@classmethod

124

def from_local_tree(cls, root: Path, exclude: list[str] | None = None,

125

include_root: bool = True) -> 'Directory':

126

"""

127

Build directory tree mirroring local filesystem structure.

128

129

Parameters:

130

- root: Root directory path to mirror

131

- exclude: Glob patterns to exclude during traversal

132

- include_root: Whether to include root directory name in paths

133

134

Returns:

135

Directory representing the tree structure

136

"""

137

138

@property

139

def files(self) -> dict[str, File]:

140

"""Files in the directory, as mapping from basenames to File objects"""

141

142

@property

143

def subdirectories(self) -> dict[str, 'Directory']:

144

"""Subdirectories in the directory, as mapping from basenames to Directory objects"""

145

146

def add_entry(self, entry: File | 'Directory') -> None:

147

"""

148

Insert a File or Directory into the tree, creating intermediate dirs as needed.

149

150

Parameters:

151

- entry: File or Directory to add to the tree

152

"""

153

154

def all_files(self) -> Iterator[File]:

155

"""Return generator of all File objects in the tree rooted at this directory"""

156

157

def __getitem__(self, name: str) -> File | 'Directory':

158

"""Retrieve an entry from the directory by basename"""

159

160

def __contains__(self, name: str) -> bool:

161

"""Check if directory contains an entry with given basename"""

162

```

163

164

### Usage Examples

165

166

```python

167

from pathlib import Path

168

from check_wheel_contents import WheelContents

169

170

# Load a wheel

171

wheel_path = Path("dist/mypackage-1.0.0-py3-none-any.whl")

172

contents = WheelContents.from_wheel(wheel_path)

173

174

# Examine wheel structure

175

print(f"Dist-info directory: {contents.dist_info_dir}")

176

print(f"Data directory: {contents.data_dir}")

177

print(f"Root is purelib: {contents.root_is_purelib}")

178

179

# Navigate file tree

180

purelib = contents.purelib_tree

181

print(f"Purelib subdirectories: {list(purelib.subdirectories.keys())}")

182

print(f"Purelib files: {list(purelib.files.keys())}")

183

184

# Find specific files

185

if "mypackage" in purelib.entries:

186

mypackage_dir = purelib["mypackage"]

187

if isinstance(mypackage_dir, Directory) and "__init__.py" in mypackage_dir:

188

init_file = mypackage_dir["__init__.py"]

189

print(f"Package __init__.py size: {init_file.size}")

190

191

# Iterate through all files in tree

192

for file in purelib.all_files():

193

print(f"File: {file.path} ({file.size} bytes)")

194

195

# Check for duplicates

196

duplicates = {}

197

for signature, files in contents.by_signature.items():

198

if len(files) > 1:

199

duplicates[signature] = [f.path for f in files]

200

201

if duplicates:

202

print("Duplicate files found:")

203

for sig, paths in duplicates.items():

204

print(f" {sig}: {paths}")

205

```