0
# Pack Files
1
2
Comprehensive pack file handling for Git's compressed object storage format with indexing, streaming, delta compression, and advanced pack management capabilities including multi-version index support and efficient delta chain resolution.
3
4
## Capabilities
5
6
### Pack Index Classes
7
8
Classes for reading and managing pack index files that provide fast object lookup.
9
10
```python { .api }
11
class PackIndex:
12
"""Abstract base class for pack index files."""
13
14
def get_pack_checksum(self) -> bytes:
15
"""Get the pack file checksum."""
16
17
def object_sha1(self, index: int) -> bytes:
18
"""Get object SHA-1 by index position."""
19
20
def object_offset(self, index: int) -> int:
21
"""Get object offset in pack file by index position."""
22
23
def objects_sha1(self) -> Iterator[bytes]:
24
"""Iterate over all object SHA-1s in the index."""
25
26
class PackIndex1(PackIndex):
27
"""Version 1 pack index format."""
28
29
def __init__(self, filename: str, file=None, contents=None): ...
30
31
class PackIndex2(PackIndex):
32
"""Version 2 pack index format (default)."""
33
34
def __init__(self, filename: str, file=None, contents=None): ...
35
36
class MemoryPackIndex(PackIndex):
37
"""In-memory pack index implementation."""
38
39
def __init__(self, entries: List[Tuple[int, bytes]], pack_checksum: bytes): ...
40
41
class FilePackIndex(PackIndex):
42
"""File-based pack index with automatic format detection."""
43
44
def __init__(self, filename: str, file=None): ...
45
```
46
47
### Pack Data Classes
48
49
Classes for reading and accessing objects from pack data files.
50
51
```python { .api }
52
class PackData:
53
"""Pack data file reader."""
54
55
def __init__(self, filename: str, file=None, size=None): ...
56
57
def __getitem__(self, offset: int) -> ShaFile:
58
"""Get object at specified offset."""
59
60
def get_object_header(self, offset: int) -> Tuple[int, int]:
61
"""Get object type and size at offset."""
62
63
def get_object_at(self, offset: int) -> ShaFile:
64
"""Get complete object at offset."""
65
66
def iterobjects(self, get_raw=None) -> Iterator[ShaFile]:
67
"""Iterate over all objects in pack."""
68
69
def sorted_entries(self, progress=None) -> List[Tuple[int, bytes, int]]:
70
"""Get sorted list of (offset, sha, crc32) entries."""
71
```
72
73
### Pack Streaming Classes
74
75
Classes for streaming pack data and building indexes.
76
77
```python { .api }
78
class PackStreamReader:
79
"""Read objects from a pack stream."""
80
81
def __init__(self, read_all, read_some=None, zlib_bufsize=None): ...
82
83
def read_objects(self, compute_crc32=False) -> Iterator[Tuple[int, ShaFile]]:
84
"""Read objects from stream with optional CRC32 computation."""
85
86
class PackStreamCopier:
87
"""Copy pack stream while building index."""
88
89
def __init__(self, read_all, read_some, outfile, delta_iter=None): ...
90
91
def verify(self) -> None:
92
"""Verify pack stream integrity."""
93
94
class PackInflater:
95
"""Inflate pack objects from stream."""
96
97
def __init__(self, read_all): ...
98
99
class PackIndexer:
100
"""Build pack index from pack stream."""
101
102
def __init__(self, f, resolve_ext_ref=None): ...
103
104
def __enter__(self): ...
105
def __exit__(self, exc_type, exc_val, exc_tb): ...
106
```
107
108
### Pack Creation Functions
109
110
Functions for creating and writing pack files.
111
112
```python { .api }
113
def write_pack(filename: str, objects: Iterator[ShaFile],
114
deltify=None, delta_window_size=None) -> bytes:
115
"""
116
Write objects to a pack file.
117
118
Args:
119
filename: Output pack filename
120
objects: Iterator of objects to pack
121
deltify: Whether to create deltas
122
delta_window_size: Delta compression window size
123
124
Returns:
125
Pack file checksum
126
"""
127
128
def write_pack_objects(f, objects: Iterator[ShaFile]) -> bytes:
129
"""
130
Write pack objects to file-like object.
131
132
Args:
133
f: File-like object to write to
134
objects: Iterator of objects to pack
135
136
Returns:
137
Pack file checksum
138
"""
139
140
def pack_objects_to_data(objects: Iterator[ShaFile]) -> bytes:
141
"""
142
Pack objects to bytes.
143
144
Args:
145
objects: Iterator of objects to pack
146
147
Returns:
148
Pack data as bytes
149
"""
150
151
def generate_unpacked_objects(object_store, object_ids: List[bytes],
152
progress=None) -> Iterator[ShaFile]:
153
"""
154
Generate objects for packing from object store.
155
156
Args:
157
object_store: Source object store
158
object_ids: List of object IDs to pack
159
progress: Optional progress callback
160
161
Yields:
162
Objects ready for packing
163
"""
164
```
165
166
### Pack Loading Functions
167
168
Functions for loading and parsing pack files.
169
170
```python { .api }
171
def load_pack_index(path: str) -> PackIndex:
172
"""
173
Load pack index from file.
174
175
Args:
176
path: Path to .idx file
177
178
Returns:
179
Appropriate PackIndex instance
180
"""
181
182
def read_pack_header(read: Callable[[int], bytes]) -> Tuple[int, int]:
183
"""
184
Read pack file header.
185
186
Args:
187
read: Function to read bytes
188
189
Returns:
190
Tuple of (version, num_objects)
191
"""
192
```
193
194
## Exception Classes
195
196
```python { .api }
197
class UnresolvedDeltas(Exception):
198
"""Exception raised when pack contains unresolved delta objects."""
199
200
class PackFileDisappeared(Exception):
201
"""Exception raised when pack file becomes unavailable during operation."""
202
```
203
204
## Usage Examples
205
206
### Reading Pack Files
207
208
```python
209
from dulwich.pack import PackData, load_pack_index
210
211
# Load pack and index
212
pack_data = PackData('objects/pack/pack-abc123.pack')
213
pack_index = load_pack_index('objects/pack/pack-abc123.idx')
214
215
# Iterate through all objects
216
for obj in pack_data.iterobjects():
217
print(f"Object {obj.id.hex()}: {obj.type_name}")
218
219
# Get specific object by SHA
220
sha = bytes.fromhex('abc123...')
221
if sha in pack_index:
222
offset = pack_index.object_offset(pack_index.object_index(sha))
223
obj = pack_data[offset]
224
print(f"Found object: {obj.type_name}")
225
```
226
227
### Creating Pack Files
228
229
```python
230
from dulwich.pack import write_pack
231
from dulwich.objects import Blob
232
233
# Create some objects
234
objects = []
235
for i in range(10):
236
blob = Blob.from_string(f"Content {i}".encode())
237
objects.append(blob)
238
239
# Write to pack file
240
checksum = write_pack('new-pack.pack', iter(objects))
241
print(f"Pack created with checksum: {checksum.hex()}")
242
```
243
244
### Streaming Pack Data
245
246
```python
247
from dulwich.pack import PackStreamReader
248
import gzip
249
250
# Read pack from compressed stream
251
with gzip.open('pack-stream.gz', 'rb') as f:
252
def read_all(size):
253
return f.read(size)
254
255
reader = PackStreamReader(read_all)
256
257
# Process objects as they're read
258
for offset, obj in reader.read_objects():
259
print(f"Object at {offset}: {obj.type_name} {obj.id.hex()}")
260
```