Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool
—
Comprehensive pack file handling for Git's compressed object storage format with indexing, streaming, delta compression, and advanced pack management capabilities including multi-version index support and efficient delta chain resolution.
Classes for reading and managing pack index files that provide fast object lookup.
class PackIndex:
"""Abstract base class for pack index files."""
def get_pack_checksum(self) -> bytes:
"""Get the pack file checksum."""
def object_sha1(self, index: int) -> bytes:
"""Get object SHA-1 by index position."""
def object_offset(self, index: int) -> int:
"""Get object offset in pack file by index position."""
def objects_sha1(self) -> Iterator[bytes]:
"""Iterate over all object SHA-1s in the index."""
class PackIndex1(PackIndex):
"""Version 1 pack index format."""
def __init__(self, filename: str, file=None, contents=None): ...
class PackIndex2(PackIndex):
"""Version 2 pack index format (default)."""
def __init__(self, filename: str, file=None, contents=None): ...
class MemoryPackIndex(PackIndex):
"""In-memory pack index implementation."""
def __init__(self, entries: List[Tuple[int, bytes]], pack_checksum: bytes): ...
class FilePackIndex(PackIndex):
"""File-based pack index with automatic format detection."""
def __init__(self, filename: str, file=None): ...Classes for reading and accessing objects from pack data files.
class PackData:
"""Pack data file reader."""
def __init__(self, filename: str, file=None, size=None): ...
def __getitem__(self, offset: int) -> ShaFile:
"""Get object at specified offset."""
def get_object_header(self, offset: int) -> Tuple[int, int]:
"""Get object type and size at offset."""
def get_object_at(self, offset: int) -> ShaFile:
"""Get complete object at offset."""
def iterobjects(self, get_raw=None) -> Iterator[ShaFile]:
"""Iterate over all objects in pack."""
def sorted_entries(self, progress=None) -> List[Tuple[int, bytes, int]]:
"""Get sorted list of (offset, sha, crc32) entries."""Classes for streaming pack data and building indexes.
class PackStreamReader:
"""Read objects from a pack stream."""
def __init__(self, read_all, read_some=None, zlib_bufsize=None): ...
def read_objects(self, compute_crc32=False) -> Iterator[Tuple[int, ShaFile]]:
"""Read objects from stream with optional CRC32 computation."""
class PackStreamCopier:
"""Copy pack stream while building index."""
def __init__(self, read_all, read_some, outfile, delta_iter=None): ...
def verify(self) -> None:
"""Verify pack stream integrity."""
class PackInflater:
"""Inflate pack objects from stream."""
def __init__(self, read_all): ...
class PackIndexer:
"""Build pack index from pack stream."""
def __init__(self, f, resolve_ext_ref=None): ...
def __enter__(self): ...
def __exit__(self, exc_type, exc_val, exc_tb): ...Functions for creating and writing pack files.
def write_pack(filename: str, objects: Iterator[ShaFile],
deltify=None, delta_window_size=None) -> bytes:
"""
Write objects to a pack file.
Args:
filename: Output pack filename
objects: Iterator of objects to pack
deltify: Whether to create deltas
delta_window_size: Delta compression window size
Returns:
Pack file checksum
"""
def write_pack_objects(f, objects: Iterator[ShaFile]) -> bytes:
"""
Write pack objects to file-like object.
Args:
f: File-like object to write to
objects: Iterator of objects to pack
Returns:
Pack file checksum
"""
def pack_objects_to_data(objects: Iterator[ShaFile]) -> bytes:
"""
Pack objects to bytes.
Args:
objects: Iterator of objects to pack
Returns:
Pack data as bytes
"""
def generate_unpacked_objects(object_store, object_ids: List[bytes],
progress=None) -> Iterator[ShaFile]:
"""
Generate objects for packing from object store.
Args:
object_store: Source object store
object_ids: List of object IDs to pack
progress: Optional progress callback
Yields:
Objects ready for packing
"""Functions for loading and parsing pack files.
def load_pack_index(path: str) -> PackIndex:
"""
Load pack index from file.
Args:
path: Path to .idx file
Returns:
Appropriate PackIndex instance
"""
def read_pack_header(read: Callable[[int], bytes]) -> Tuple[int, int]:
"""
Read pack file header.
Args:
read: Function to read bytes
Returns:
Tuple of (version, num_objects)
"""class UnresolvedDeltas(Exception):
"""Exception raised when pack contains unresolved delta objects."""
class PackFileDisappeared(Exception):
"""Exception raised when pack file becomes unavailable during operation."""from dulwich.pack import PackData, load_pack_index
# Load pack and index
pack_data = PackData('objects/pack/pack-abc123.pack')
pack_index = load_pack_index('objects/pack/pack-abc123.idx')
# Iterate through all objects
for obj in pack_data.iterobjects():
print(f"Object {obj.id.hex()}: {obj.type_name}")
# Get specific object by SHA
sha = bytes.fromhex('abc123...')
if sha in pack_index:
offset = pack_index.object_offset(pack_index.object_index(sha))
obj = pack_data[offset]
print(f"Found object: {obj.type_name}")from dulwich.pack import write_pack
from dulwich.objects import Blob
# Create some objects
objects = []
for i in range(10):
blob = Blob.from_string(f"Content {i}".encode())
objects.append(blob)
# Write to pack file
checksum = write_pack('new-pack.pack', iter(objects))
print(f"Pack created with checksum: {checksum.hex()}")from dulwich.pack import PackStreamReader
import gzip
# Read pack from compressed stream
with gzip.open('pack-stream.gz', 'rb') as f:
def read_all(size):
return f.read(size)
reader = PackStreamReader(read_all)
# Process objects as they're read
for offset, obj in reader.read_objects():
print(f"Object at {offset}: {obj.type_name} {obj.id.hex()}")Install with Tessl CLI
npx tessl i tessl/pypi-dulwich