CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-dulwich

Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool

Pending
Overview
Eval results
Files

pack-files.mddocs/

Pack Files

Comprehensive pack file handling for Git's compressed object storage format with indexing, streaming, delta compression, and advanced pack management capabilities including multi-version index support and efficient delta chain resolution.

Capabilities

Pack Index Classes

Classes for reading and managing pack index files that provide fast object lookup.

class PackIndex:
    """Abstract base class for pack index files."""
    
    def get_pack_checksum(self) -> bytes:
        """Get the pack file checksum."""
    
    def object_sha1(self, index: int) -> bytes:
        """Get object SHA-1 by index position."""
    
    def object_offset(self, index: int) -> int:
        """Get object offset in pack file by index position."""
    
    def objects_sha1(self) -> Iterator[bytes]:
        """Iterate over all object SHA-1s in the index."""

class PackIndex1(PackIndex):
    """Version 1 pack index format."""
    
    def __init__(self, filename: str, file=None, contents=None): ...

class PackIndex2(PackIndex):
    """Version 2 pack index format (default)."""
    
    def __init__(self, filename: str, file=None, contents=None): ...

class MemoryPackIndex(PackIndex):
    """In-memory pack index implementation."""
    
    def __init__(self, entries: List[Tuple[int, bytes]], pack_checksum: bytes): ...

class FilePackIndex(PackIndex):
    """File-based pack index with automatic format detection."""
    
    def __init__(self, filename: str, file=None): ...

Pack Data Classes

Classes for reading and accessing objects from pack data files.

class PackData:
    """Pack data file reader."""
    
    def __init__(self, filename: str, file=None, size=None): ...
    
    def __getitem__(self, offset: int) -> ShaFile:
        """Get object at specified offset."""
    
    def get_object_header(self, offset: int) -> Tuple[int, int]:
        """Get object type and size at offset."""
    
    def get_object_at(self, offset: int) -> ShaFile:
        """Get complete object at offset."""
    
    def iterobjects(self, get_raw=None) -> Iterator[ShaFile]:
        """Iterate over all objects in pack."""
    
    def sorted_entries(self, progress=None) -> List[Tuple[int, bytes, int]]:
        """Get sorted list of (offset, sha, crc32) entries."""

Pack Streaming Classes

Classes for streaming pack data and building indexes.

class PackStreamReader:
    """Read objects from a pack stream."""
    
    def __init__(self, read_all, read_some=None, zlib_bufsize=None): ...
    
    def read_objects(self, compute_crc32=False) -> Iterator[Tuple[int, ShaFile]]:
        """Read objects from stream with optional CRC32 computation."""

class PackStreamCopier:
    """Copy pack stream while building index."""
    
    def __init__(self, read_all, read_some, outfile, delta_iter=None): ...
    
    def verify(self) -> None:
        """Verify pack stream integrity."""

class PackInflater:
    """Inflate pack objects from stream."""
    
    def __init__(self, read_all): ...

class PackIndexer:
    """Build pack index from pack stream."""
    
    def __init__(self, f, resolve_ext_ref=None): ...
    
    def __enter__(self): ...
    def __exit__(self, exc_type, exc_val, exc_tb): ...

Pack Creation Functions

Functions for creating and writing pack files.

def write_pack(filename: str, objects: Iterator[ShaFile], 
               deltify=None, delta_window_size=None) -> bytes:
    """
    Write objects to a pack file.
    
    Args:
        filename: Output pack filename
        objects: Iterator of objects to pack
        deltify: Whether to create deltas
        delta_window_size: Delta compression window size
        
    Returns:
        Pack file checksum
    """

def write_pack_objects(f, objects: Iterator[ShaFile]) -> bytes:
    """
    Write pack objects to file-like object.
    
    Args:
        f: File-like object to write to
        objects: Iterator of objects to pack
        
    Returns:
        Pack file checksum
    """

def pack_objects_to_data(objects: Iterator[ShaFile]) -> bytes:
    """
    Pack objects to bytes.
    
    Args:
        objects: Iterator of objects to pack
        
    Returns:
        Pack data as bytes
    """

def generate_unpacked_objects(object_store, object_ids: List[bytes],
                              progress=None) -> Iterator[ShaFile]:
    """
    Generate objects for packing from object store.
    
    Args:
        object_store: Source object store
        object_ids: List of object IDs to pack
        progress: Optional progress callback
        
    Yields:
        Objects ready for packing
    """

Pack Loading Functions

Functions for loading and parsing pack files.

def load_pack_index(path: str) -> PackIndex:
    """
    Load pack index from file.
    
    Args:
        path: Path to .idx file
        
    Returns:
        Appropriate PackIndex instance
    """

def read_pack_header(read: Callable[[int], bytes]) -> Tuple[int, int]:
    """
    Read pack file header.
    
    Args:
        read: Function to read bytes
        
    Returns:
        Tuple of (version, num_objects)
    """

Exception Classes

class UnresolvedDeltas(Exception):
    """Exception raised when pack contains unresolved delta objects."""

class PackFileDisappeared(Exception):
    """Exception raised when pack file becomes unavailable during operation."""

Usage Examples

Reading Pack Files

from dulwich.pack import PackData, load_pack_index

# Load pack and index
pack_data = PackData('objects/pack/pack-abc123.pack')
pack_index = load_pack_index('objects/pack/pack-abc123.idx')

# Iterate through all objects
for obj in pack_data.iterobjects():
    print(f"Object {obj.id.hex()}: {obj.type_name}")

# Get specific object by SHA
sha = bytes.fromhex('abc123...')
if sha in pack_index:
    offset = pack_index.object_offset(pack_index.object_index(sha))
    obj = pack_data[offset]
    print(f"Found object: {obj.type_name}")

Creating Pack Files

from dulwich.pack import write_pack
from dulwich.objects import Blob

# Create some objects
objects = []
for i in range(10):
    blob = Blob.from_string(f"Content {i}".encode())
    objects.append(blob)

# Write to pack file
checksum = write_pack('new-pack.pack', iter(objects))
print(f"Pack created with checksum: {checksum.hex()}")

Streaming Pack Data

from dulwich.pack import PackStreamReader
import gzip

# Read pack from compressed stream  
with gzip.open('pack-stream.gz', 'rb') as f:
    def read_all(size):
        return f.read(size)
    
    reader = PackStreamReader(read_all)
    
    # Process objects as they're read
    for offset, obj in reader.read_objects():
        print(f"Object at {offset}: {obj.type_name} {obj.id.hex()}")

Install with Tessl CLI

npx tessl i tessl/pypi-dulwich

docs

cli.md

clients.md

configuration.md

diff-merge.md

index-management.md

index.md

object-storage.md

objects.md

pack-files.md

porcelain.md

references.md

repository.md

tile.json