Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool
npx @tessl/cli install tessl/pypi-dulwich@0.24.0A pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool to be installed. Dulwich offers both low-level API access to Git objects, references, and data structures, as well as higher-level 'porcelain' functionality for common Git operations like cloning, committing, and pushing.
pip install dulwichpip install dulwich[fastimport,https,pgp,paramiko] for additional featuresBasic repository access:
from dulwich.repo import RepoHigh-level Git operations (porcelain API):
from dulwich import porcelainGit objects:
from dulwich.objects import Blob, Tree, Commit, TagError handling:
from dulwich.errors import NotGitRepositoryfrom dulwich.repo import Repo
from dulwich import porcelain
# Open an existing repository
repo = Repo('.')
# Get the current HEAD commit
head_commit = repo[repo.head()]
print(f"Latest commit: {head_commit.message.decode('utf-8').strip()}")
# High-level operations using porcelain
porcelain.add(repo, 'new_file.txt')
porcelain.commit(repo, 'Added new file', author='John Doe <john@example.com>')
# Clone a repository
porcelain.clone('https://github.com/user/repo.git', 'local-repo')
# Show repository status
status = porcelain.status('.')
print(f"Modified files: {list(status.staged['modify'])}")Dulwich is designed with multiple abstraction layers:
dulwich.porcelainRepo classesThis layered design provides maximum flexibility, allowing users to choose the appropriate level of abstraction while maintaining compatibility with Git's internal formats and protocols.
Git command equivalents providing familiar functionality for repository management, file operations, branching, merging, and remote synchronization.
def init(path: str = ".", bare: bool = False) -> Repo: ...
def clone(source: str, target: str = None, **kwargs) -> Repo: ...
def add(repo, paths: List[str]) -> None: ...
def commit(repo, message: str, author: str = None, **kwargs) -> bytes: ...
def push(repo, remote_location: str = None, **kwargs) -> Dict: ...
def pull(repo, remote_location: str = None, **kwargs) -> None: ...
def status(repo) -> PorterStatus: ...
def log(repo, max_entries: int = None, **kwargs) -> None: ...Core repository classes for opening, creating, and managing Git repositories with support for filesystem and in-memory backends.
class Repo:
def __init__(self, root: str): ...
def head(self) -> bytes: ...
def __getitem__(self, key: bytes) -> ShaFile: ...
def close(self) -> None: ...
class MemoryRepo:
def __init__(self): ...Complete implementation of Git's object model including blobs, trees, commits, and tags with full read/write capabilities and format compliance.
class ShaFile:
@property
def id(self) -> bytes: ...
def as_raw_string(self) -> bytes: ...
class Blob(ShaFile):
def __init__(self, data: bytes = b""): ...
@property
def data(self) -> bytes: ...
class Tree(ShaFile):
def __init__(self): ...
def add(self, name: bytes, mode: int, hexsha: bytes) -> None: ...
def items(self) -> Iterator[TreeEntry]: ...
class Commit(ShaFile):
def __init__(self): ...
@property
def message(self) -> bytes: ...
@property
def author(self) -> bytes: ...
@property
def tree(self) -> bytes: ...Flexible object storage backends supporting filesystem, memory, pack files, and cloud storage with efficient object retrieval and storage operations.
class BaseObjectStore:
def __contains__(self, sha: bytes) -> bool: ...
def __getitem__(self, sha: bytes) -> ShaFile: ...
def add_object(self, obj: ShaFile) -> None: ...
class DiskObjectStore(BaseObjectStore): ...
class MemoryObjectStore(BaseObjectStore): ...
class PackBasedObjectStore(BaseObjectStore): ...Git index manipulation for staging changes, managing file states, and preparing commits with support for conflict resolution and extensions.
class Index:
def __init__(self, filename: str): ...
def __getitem__(self, name: bytes) -> IndexEntry: ...
def __setitem__(self, name: bytes, entry: IndexEntry) -> None: ...
def commit(self, object_store: BaseObjectStore) -> bytes: ...
class IndexEntry:
def __init__(self, **kwargs): ...Complete reference management including branches, tags, symbolic references, and packed refs with validation and atomic updates.
class RefsContainer:
def __getitem__(self, name: bytes) -> bytes: ...
def __setitem__(self, name: bytes, value: bytes) -> None: ...
def keys(self) -> Iterator[bytes]: ...
def check_ref_format(refname: bytes) -> bool: ...
def parse_symref_value(contents: bytes) -> bytes: ...Network protocol implementations for communicating with Git servers over HTTP, SSH, and Git protocols with authentication and progress tracking.
class GitClient:
def fetch_pack(self, path: str, **kwargs) -> FetchPackResult: ...
def send_pack(self, path: str, **kwargs) -> SendPackResult: ...
class TCPGitClient(GitClient): ...
class SSHGitClient(GitClient): ...
class Urllib3HttpGitClient(GitClient): ...
def get_transport_and_path(uri: str) -> Tuple[GitClient, str]: ...Efficient pack file handling for Git's compressed object storage format with indexing, streaming, and delta compression support.
class PackData:
def __init__(self, filename: str): ...
def __getitem__(self, offset: int) -> ShaFile: ...
class PackIndex:
def get_pack_checksum(self) -> bytes: ...
def object_sha1(self, index: int) -> bytes: ...
def write_pack_objects(f, objects: Iterator[ShaFile]) -> None: ...Git configuration file parsing and manipulation supporting repository, user, and system-level configuration with type conversion and validation.
class Config:
def get(self, section: bytes, name: bytes) -> bytes: ...
def set(self, section: bytes, name: bytes, value: bytes) -> None: ...
class ConfigFile(Config):
def __init__(self, filename: str): ...
class StackedConfig(Config):
def __init__(self, backends: List[Config]): ...Comprehensive diff generation and merge algorithms for comparing trees, detecting changes, and resolving conflicts with rename detection.
def tree_changes(object_store, old_tree: bytes, new_tree: bytes) -> Iterator[TreeChange]: ...
def diff_tree_to_tree(object_store, old_tree: bytes, new_tree: bytes) -> bytes: ...
class RenameDetector:
def __init__(self, object_store): ...
def changes_with_renames(self, changes: List[TreeChange]) -> List[TreeChange]: ...Complete CLI implementation providing Git command equivalents with argument parsing, progress reporting, and consistent behavior.
def main(argv: List[str] = None) -> int: ...
class Command:
def run(self, args: List[str]) -> int: ...# Core types used across the API
ObjectID = bytes # 20-byte SHA-1 hash
TreeEntry = NamedTuple[bytes, int, bytes] # name, mode, sha
class PorterStatus:
staged: Dict[str, List[str]]
unstaged: Dict[str, List[str]]
untracked: List[str]
class FetchPackResult:
refs: Dict[bytes, bytes]
symrefs: Dict[bytes, bytes]
class SendPackResult:
ref_status: Dict[bytes, str]