Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool
—
Core repository classes for opening, creating, and managing Git repositories. These classes provide direct access to repository internals including objects, references, and configuration.
Main repository implementations supporting different storage backends and access patterns.
class BaseRepo:
"""
Abstract base class for Git repositories.
Provides common interface for all repository implementations.
"""
def __init__(self, object_store: PackBasedObjectStore, refs: RefsContainer) -> None:
"""
Open a repository.
This shouldn't be called directly, but rather through one of the
base classes, such as MemoryRepo or Repo.
Parameters:
- object_store: Object store to use
- refs: Refs container to use
"""
def head(self) -> bytes:
"""
Get the SHA-1 of the current HEAD.
Returns:
20-byte SHA-1 hash of HEAD commit
"""
def get_refs(self) -> dict[bytes, bytes]:
"""
Get dictionary with all refs.
Returns:
A dict mapping ref names to SHA1s
"""
def get_config(self) -> Config:
"""
Get repository configuration.
Returns:
Config object for repository settings
"""
def close(self) -> None:
"""Close repository and release resources."""
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.close()
def __getitem__(self, key: bytes) -> ShaFile:
"""
Retrieve object by SHA-1.
Parameters:
- key: 20-byte SHA-1 hash
Returns:
ShaFile object (Blob, Tree, Commit, or Tag)
"""
def __contains__(self, key: bytes) -> bool:
"""
Check if object exists in repository.
Parameters:
- key: 20-byte SHA-1 hash
Returns:
True if object exists
"""
def fetch(
self,
target,
determine_wants=None,
progress=None,
depth: Optional[int] = None
):
"""
Fetch objects into another repository.
Parameters:
- target: The target repository
- determine_wants: Optional function to determine what refs to fetch
- progress: Optional progress function
- depth: Optional shallow fetch depth
Returns:
The local refs
"""
def fetch_pack_data(
self,
determine_wants,
graph_walker,
progress,
*,
get_tagged=None,
depth: Optional[int] = None
):
"""
Fetch the pack data required for a set of revisions.
Parameters:
- determine_wants: Function that takes a dictionary with heads and returns the list of heads to fetch
- graph_walker: Object that can iterate over the list of revisions to fetch
- progress: Simple progress function that will be called with updated progress strings
- get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags
- depth: Shallow fetch depth
Returns:
count and iterator over pack data
"""
def find_missing_objects(
self,
determine_wants,
graph_walker,
progress,
*,
get_tagged=None,
depth: Optional[int] = None
) -> Optional[MissingObjectFinder]:
"""
Fetch the missing objects required for a set of revisions.
Parameters:
- determine_wants: Function that takes a dictionary with heads and returns the list of heads to fetch
- graph_walker: Object that can iterate over the list of revisions to fetch
- progress: Simple progress function that will be called with updated progress strings
- get_tagged: Function that returns a dict of pointed-to sha -> tag sha for including tags
- depth: Shallow fetch depth
Returns:
iterator over objects, with __len__ implemented
"""
def generate_pack_data(
self,
have: list[ObjectID],
want: list[ObjectID],
progress: Optional[Callable[[str], None]] = None,
ofs_delta: Optional[bool] = None
):
"""
Generate pack data objects for a set of wants/haves.
Parameters:
- have: List of SHA1s of objects that should not be sent
- want: List of SHA1s of objects that should be sent
- ofs_delta: Whether OFS deltas can be included
- progress: Optional progress reporting method
"""
def get_graph_walker(
self,
heads: Optional[list[ObjectID]] = None
) -> ObjectStoreGraphWalker:
"""
Retrieve a graph walker.
A graph walker is used by a remote repository (or proxy)
to find out which objects are present in this repository.
Parameters:
- heads: Repository heads to use (optional)
Returns:
A graph walker object
"""
def get_parents(self, sha: bytes, commit: Optional[Commit] = None) -> list[bytes]:
"""
Retrieve the parents of a specific commit.
If the specific commit is a graftpoint, the graft parents
will be returned instead.
Parameters:
- sha: SHA of the commit for which to retrieve the parents
- commit: Optional commit matching the sha
Returns:
List of parents
"""
def parents_provider(self) -> ParentsProvider:
"""
Get parents provider for this repository.
Returns:
ParentsProvider object for resolving commit parents
"""
def get_worktree(self) -> "WorkTree":
"""
Get the working tree for this repository.
Returns:
WorkTree instance for performing working tree operations
Raises:
NotImplementedError: If the repository doesn't support working trees
"""
def get_rebase_state_manager(self):
"""
Get the appropriate rebase state manager for this repository.
Returns:
RebaseStateManager instance
"""
def get_blob_normalizer(self):
"""
Return a BlobNormalizer object for checkin/checkout operations.
Returns:
BlobNormalizer instance
"""
def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
"""
Read gitattributes for the repository.
Parameters:
- tree: Tree SHA to read .gitattributes from (defaults to HEAD)
Returns:
GitAttributes object that can be used to match paths
"""
def get_shallow(self) -> set[ObjectID]:
"""
Get the set of shallow commits.
Returns:
Set of shallow commits
"""
def update_shallow(self, new_shallow, new_unshallow) -> None:
"""
Update the list of shallow objects.
Parameters:
- new_shallow: Newly shallow objects
- new_unshallow: Newly no longer shallow objects
"""
def get_peeled(self, ref: Ref) -> ObjectID:
"""
Get the peeled value of a ref.
Parameters:
- ref: The refname to peel
Returns:
The fully-peeled SHA1 of a tag object, after peeling all
intermediate tags; if the original ref does not point to a tag,
this will equal the original SHA1
"""
@property
def notes(self) -> "Notes":
"""
Access notes functionality for this repository.
Returns:
Notes object for accessing notes
"""
def get_walker(self, include: Optional[list[bytes]] = None, **kwargs):
"""
Obtain a walker for this repository.
Parameters:
- include: Iterable of SHAs of commits to include along with their ancestors. Defaults to [HEAD]
Keyword Args:
- exclude: Iterable of SHAs of commits to exclude along with their ancestors, overriding includes
- order: ORDER_* constant specifying the order of results. Anything other than ORDER_DATE may result in O(n) memory usage
- reverse: If True, reverse the order of output, requiring O(n) memory
- max_entries: The maximum number of entries to yield, or None for no limit
- paths: Iterable of file or subtree paths to show entries for
- rename_detector: diff.RenameDetector object for detecting renames
- follow: If True, follow path across renames/copies. Forces a default rename_detector
- since: Timestamp to list commits after
- until: Timestamp to list commits before
- queue_cls: A class to use for a queue of commits, supporting the iterator protocol
Returns:
A Walker object
"""
class Repo(BaseRepo):
"""
Git repository on local filesystem.
Provides access to .git directory structure including objects,
refs, config, index, and working tree.
"""
def __init__(self, root: Union[str, bytes, os.PathLike] = ".", object_store: Optional[PackBasedObjectStore] = None, bare: Optional[bool] = None):
"""
Initialize repository object.
Parameters:
- root: Path to repository root (containing .git)
- object_store: ObjectStore to use; if omitted, we use the repository's default object store
- bare: True if this is a bare repository
"""
@property
def path(self) -> str:
"""
Repository root path.
Returns:
Absolute path to repository root
"""
@property
def controldir(self) -> str:
"""
Git control directory path.
Returns:
Path to .git directory
"""
@property
def commondir(self) -> str:
"""
Common Git directory for worktrees.
Returns:
Path to common .git directory
"""
def open_index(self) -> Index:
"""
Open repository index file.
Returns:
Index object for staging area
"""
def stage(self, fs_paths: List[str]) -> None:
"""
Stage files for commit.
Parameters:
- fs_paths: List of filesystem paths to stage
"""
def unstage(self, fs_paths: List[str]) -> None:
"""
Unstage files from index.
Parameters:
- fs_paths: List of filesystem paths to unstage
"""
def reset_index(self, tree_id: bytes = None) -> None:
"""
Reset index to match tree.
Parameters:
- tree_id: Tree SHA-1 (default: HEAD tree)
"""
@property
def worktrees(self) -> "WorkTreeContainer":
"""
Access worktrees container for this repository.
Returns:
WorkTreeContainer for managing linked worktrees
"""
@classmethod
def discover(cls, start="."):
"""
Iterate parent directories to discover a repository.
Return a Repo object for the first parent directory that looks like a
Git repository.
Parameters:
- start: The directory to start discovery from (defaults to '.')
"""
def clone(
self,
target_path,
*,
mkdir=True,
bare=False,
origin=b"origin",
checkout=None,
branch=None,
progress=None,
depth: Optional[int] = None,
symlinks=None,
) -> "Repo":
"""
Clone this repository.
Parameters:
- target_path: Target path
- mkdir: Create the target directory
- bare: Whether to create a bare repository
- checkout: Whether or not to check-out HEAD after cloning
- origin: Base name for refs in target repository cloned from this repository
- branch: Optional branch or tag to be used as HEAD in the new repository instead of this repository's HEAD
- progress: Optional progress function
- depth: Depth at which to fetch
- symlinks: Symlinks setting (default to autodetect)
Returns:
Created repository as Repo
"""
def read_reflog(self, ref):
"""
Read reflog entries for a reference.
Parameters:
- ref: Reference name (e.g. b'HEAD', b'refs/heads/master')
Yields:
reflog.Entry objects in chronological order (oldest first)
"""
@classmethod
def _init_new_working_directory(
cls,
path: Union[str, bytes, os.PathLike],
main_repo,
identifier=None,
mkdir=False,
):
"""
Create a new working directory linked to a repository.
Parameters:
- path: Path in which to create the working tree
- main_repo: Main repository to reference
- identifier: Worktree identifier
- mkdir: Whether to create the directory
Returns:
Repo instance
"""
def get_blob_normalizer(self):
"""
Return a BlobNormalizer object for checkin/checkout operations.
Returns:
BlobNormalizer instance
"""
def get_gitattributes(self, tree: Optional[bytes] = None) -> "GitAttributes":
"""
Read gitattributes for the repository.
Parameters:
- tree: Tree SHA to read .gitattributes from (defaults to HEAD)
Returns:
GitAttributes object that can be used to match paths
"""
class MemoryRepo(BaseRepo):
"""
Git repository stored entirely in memory.
Useful for testing and temporary operations without
filesystem storage.
"""
def __init__(self):
"""Initialize empty in-memory repository."""
def set_description(self, description: bytes) -> None:
"""
Set repository description.
Parameters:
- description: Repository description text
"""
def get_description(self) -> bytes:
"""
Get repository description.
Returns:
Repository description text
"""
def do_commit(
self,
message: Optional[bytes] = None,
committer: Optional[bytes] = None,
author: Optional[bytes] = None,
commit_timestamp=None,
commit_timezone=None,
author_timestamp=None,
author_timezone=None,
tree: Optional[ObjectID] = None,
encoding: Optional[bytes] = None,
ref: Optional[Ref] = b"HEAD",
merge_heads: Optional[list[ObjectID]] = None,
no_verify: bool = False,
sign: bool = False,
):
"""
Create a new commit.
This is a simplified implementation for in-memory repositories that
doesn't support worktree operations or hooks.
Parameters:
- message: Commit message
- committer: Committer fullname
- author: Author fullname
- commit_timestamp: Commit timestamp (defaults to now)
- commit_timezone: Commit timestamp timezone (defaults to GMT)
- author_timestamp: Author timestamp (defaults to commit timestamp)
- author_timezone: Author timestamp timezone (defaults to commit timezone)
- tree: SHA1 of the tree root to use
- encoding: Encoding
- ref: Optional ref to commit to (defaults to current branch). If None, creates a dangling commit without updating any ref
- merge_heads: Merge heads
- no_verify: Skip pre-commit and commit-msg hooks (ignored for MemoryRepo)
- sign: GPG Sign the commit (ignored for MemoryRepo)
Returns:
New commit SHA1
"""
@classmethod
def init_bare(cls, objects, refs, format: Optional[int] = None):
"""
Create a new bare repository in memory.
Parameters:
- objects: Objects for the new repository, as iterable
- refs: Refs as dictionary, mapping names to object SHA1s
- format: Repository format version (defaults to 0)
"""
class BareRepo(Repo):
"""
Bare Git repository without working tree.
Contains only Git objects and metadata without
checked out files.
"""
def __init__(self, path: str):
"""
Initialize bare repository.
Parameters:
- path: Path to bare repository directory
"""Helper class for resolving commit parents and ancestry relationships.
class ParentsProvider:
"""
Provides parent commit information for merge operations.
Used by merge algorithms to traverse commit ancestry
and determine merge bases.
"""
def __init__(self, store, grafts={}, shallows=[]):
"""
Initialize parents provider.
Parameters:
- store: Object store containing commits
- grafts: Dictionary of commit grafts
- shallows: List of shallow commit SHAs
"""
def get_parents(self, commit_id, commit=None):
"""
Get parent commit IDs.
Parameters:
- commit_id: Commit SHA-1 hash
- commit: Optional commit object
Returns:
List of parent commit SHA-1 hashes
"""Repository-specific exception classes for error handling.
class InvalidUserIdentity(Exception):
"""
Raised when user identity is invalid for commit operations.
User identity must be in format "Name <email>".
"""
class DefaultIdentityNotFound(Exception):
"""
Raised when no default user identity found in configuration.
Occurs when attempting commit without author/committer
and no default configured.
"""
class UnsupportedVersion(Exception):
"""
Raised when repository version is not supported.
Occurs when opening repository with newer format version
than supported by current dulwich version.
"""
class UnsupportedExtension(Exception):
"""
Raised when repository uses unsupported extensions.
Git repositories can have extensions that modify behavior.
This is raised for unknown or unsupported extensions.
"""from dulwich.repo import Repo, init_repo
from dulwich.errors import NotGitRepository
# Initialize new repository
repo = init_repo("/path/to/new/repo")
# Open existing repository
try:
repo = Repo("/path/to/existing/repo")
print(f"Repository at: {repo.path}")
print(f"Current HEAD: {repo.head().hex()}")
finally:
repo.close()
# Using context manager (recommended)
with Repo("/path/to/repo") as repo:
head_commit = repo[repo.head()]
print(f"Latest commit: {head_commit.message.decode('utf-8')}")with Repo("/path/to/repo") as repo:
# Get HEAD commit
head_sha = repo.head()
head_commit = repo[head_sha]
# Get commit tree
tree = repo[head_commit.tree]
# List tree contents
for entry in tree.items():
print(f"{entry.path.decode('utf-8')}: {entry.sha.hex()}")
# Check if object exists
if head_sha in repo:
print("HEAD commit exists in repository")from dulwich.repo import find_repo_root, check_repo_exists
# Find repository root from current directory
try:
repo_root = find_repo_root()
print(f"Repository root: {repo_root}")
except NotGitRepository:
print("Not in a Git repository")
# Check if directory contains repository
if check_repo_exists("/some/path"):
print("Valid Git repository found")Install with Tessl CLI
npx tessl i tessl/pypi-dulwich