Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool
—
Git index manipulation for staging changes, managing file states, and preparing commits. The index serves as Git's staging area between the working directory and repository history.
Main class for Git index operations with file staging and commit preparation.
class Index:
"""
Git index (staging area) for managing file changes.
The index tracks file modifications, additions, and deletions
between the working directory and repository commits.
"""
def __init__(
self,
filename: Union[bytes, str, os.PathLike],
read: bool = True,
skip_hash: bool = False,
version: Optional[int] = None,
):
"""
Initialize index from file.
Parameters:
- filename: Path to index file (.git/index)
- read: Whether to initialize from existing file
- skip_hash: Whether to skip SHA1 hash when writing (for manyfiles feature)
- version: Index format version to use (None = auto-detect)
"""
def __getitem__(self, name: bytes) -> IndexEntry:
"""
Get index entry by path.
Parameters:
- name: File path as bytes
Returns:
IndexEntry for the file
Raises:
KeyError: If file not in index
"""
def __setitem__(self, name: bytes, entry: IndexEntry) -> None:
"""
Set index entry for path.
Parameters:
- name: File path as bytes
- entry: IndexEntry object
"""
def __delitem__(self, name: bytes) -> None:
"""
Remove entry from index.
Parameters:
- name: File path as bytes to remove
"""
def __contains__(self, name: bytes) -> bool:
"""
Check if path is in index.
Parameters:
- name: File path as bytes
Returns:
True if path exists in index
"""
def __iter__(self) -> Iterator[bytes]:
"""
Iterate over index paths.
Yields:
File paths as bytes
"""
def __len__(self) -> int:
"""
Get number of entries in index.
Returns:
Number of index entries
"""
def clear(self) -> None:
"""Remove all entries from index."""
def get_sha1(self, path: bytes) -> bytes:
"""
Return the SHA-1 for the object at a path.
Parameters:
- path: File path as bytes
Returns:
SHA-1 hash of the object
Raises:
UnmergedEntries: If path has merge conflicts
"""
def get_mode(self, path: bytes) -> int:
"""
Return the POSIX file mode for the object at a path.
Parameters:
- path: File path as bytes
Returns:
File mode as integer
Raises:
UnmergedEntries: If path has merge conflicts
"""
def iterobjects(self) -> Iterable[tuple[bytes, bytes, int]]:
"""
Iterate over path, sha, mode tuples for use with commit_tree.
Yields:
Tuples of (path, sha, mode)
Raises:
UnmergedEntries: If index contains unmerged entries
"""
def has_conflicts(self) -> bool:
"""
Check if index contains merge conflicts.
Returns:
True if conflicts exist
"""
def changes_from_tree(
self,
object_store: ObjectContainer,
tree: ObjectID,
want_unchanged: bool = False,
) -> Generator[
tuple[
tuple[Optional[bytes], Optional[bytes]],
tuple[Optional[int], Optional[int]],
tuple[Optional[bytes], Optional[bytes]],
],
None,
None,
]:
"""
Find differences between index and tree.
Parameters:
- object_store: Object store for retrieving tree contents
- tree: SHA-1 of the root tree
- want_unchanged: Whether unchanged files should be reported
Yields:
Tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
"""
def commit(self, object_store: ObjectContainer) -> bytes:
"""
Create tree object from index contents.
Parameters:
- object_store: Object store for writing tree objects
Returns:
SHA-1 hash of created tree
"""
def write(self) -> None:
"""Write index to disk."""
def read(self) -> None:
"""Read index from disk."""
@property
def path(self) -> str:
"""
Index file path.
Returns:
Path to index file
"""
@property
def version(self) -> int:
"""
Index format version.
Returns:
Index format version number
"""Classes representing individual entries in the Git index.
class IndexEntry:
"""
Single entry in Git index representing staged file.
Contains file metadata including timestamps, permissions,
size, and SHA-1 hash of content.
"""
def __init__(
self,
ctime: Tuple[int, int],
mtime: Tuple[int, int],
dev: int,
ino: int,
mode: int,
uid: int,
gid: int,
size: int,
sha: bytes,
flags: int = 0
):
"""
Create index entry.
Parameters:
- ctime: Creation time as (seconds, nanoseconds)
- mtime: Modification time as (seconds, nanoseconds)
- dev: Device ID
- ino: Inode number
- mode: File mode/permissions
- uid: User ID
- gid: Group ID
- size: File size in bytes
- sha: 20-byte SHA-1 hash of content
- flags: Index entry flags
"""
@property
def ctime(self) -> Tuple[int, int]:
"""
Creation time.
Returns:
Tuple of (seconds, nanoseconds)
"""
@property
def mtime(self) -> Tuple[int, int]:
"""
Modification time.
Returns:
Tuple of (seconds, nanoseconds)
"""
@property
def mode(self) -> int:
"""
File mode/permissions.
Returns:
Unix file mode
"""
@property
def sha(self) -> bytes:
"""
Content SHA-1 hash.
Returns:
20-byte SHA-1 hash
"""
@property
def size(self) -> int:
"""
File size.
Returns:
File size in bytes
"""
@property
def flags(self) -> int:
"""
Index entry flags.
Returns:
Flags indicating entry state
"""
def stage(self) -> Stage:
"""
Get the merge stage of this entry.
Returns:
Stage enum value
"""
@property
def skip_worktree(self) -> bool:
"""
Return True if the skip-worktree bit is set.
Returns:
True if skip-worktree flag is set
"""
def set_skip_worktree(self, skip: bool = True) -> None:
"""
Set or clear the skip-worktree bit.
Parameters:
- skip: Whether to set the skip-worktree bit
"""
@classmethod
def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
"""
Create IndexEntry from serialized data.
Parameters:
- serialized: SerializedIndexEntry object
Returns:
IndexEntry object
"""
def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
"""
Serialize entry for writing to index file.
Parameters:
- name: File path as bytes
- stage: Merge stage
Returns:
SerializedIndexEntry object
"""
@dataclass
class SerializedIndexEntry:
"""
Serialized representation of an index entry.
Used during index file I/O operations to handle
raw data before conversion to IndexEntry objects.
"""
name: bytes
ctime: Union[int, float, tuple[int, int]]
mtime: Union[int, float, tuple[int, int]]
dev: int
ino: int
mode: int
uid: int
gid: int
size: int
sha: bytes
flags: int
extended_flags: int
def stage(self) -> Stage:
"""
Get the merge stage of this entry.
Returns:
Stage enum value
"""
class Stage(Enum):
"""
Merge conflict stage numbers.
Used to identify different versions of files
during merge operations.
"""
NORMAL = 0
MERGE_CONFLICT_ANCESTOR = 1
MERGE_CONFLICT_THIS = 2
MERGE_CONFLICT_OTHER = 3
class ConflictedIndexEntry:
"""
Index entry representing merge conflict.
Contains multiple versions of the same file from
different merge parents requiring resolution.
"""
ancestor: Optional[IndexEntry]
this: Optional[IndexEntry]
other: Optional[IndexEntry]
def __init__(
self,
ancestor: Optional[IndexEntry] = None,
this: Optional[IndexEntry] = None,
other: Optional[IndexEntry] = None,
):
"""
Create conflicted index entry.
Parameters:
- ancestor: Common ancestor version (stage 1)
- this: Current branch version (stage 2)
- other: Other branch version (stage 3)
"""Classes for handling Git index extensions.
@dataclass
class IndexExtension:
"""
Base class for index extensions.
Extensions provide additional metadata stored
in the Git index file format.
"""
signature: bytes
data: bytes
@classmethod
def from_raw(cls, signature: bytes, data: bytes) -> "IndexExtension":
"""
Create extension from raw data.
Parameters:
- signature: 4-byte extension signature
- data: Extension data
Returns:
Parsed extension object
"""
def to_bytes(self) -> bytes:
"""
Serialize extension to bytes.
Returns:
Extension data as bytes
"""
class TreeExtension(IndexExtension):
"""
Tree cache extension for faster tree object creation.
Caches tree SHA-1 values to avoid recalculation
when creating tree objects from index.
"""
def __init__(self, entries: list[tuple[bytes, bytes, int]]) -> None:
"""
Initialize tree extension.
Parameters:
- entries: List of (path, sha, entry_count) tuples
"""
class ResolveUndoExtension(IndexExtension):
"""
Resolve undo extension for recording merge conflicts.
Stores information about resolved conflicts to allow
undoing merge conflict resolution.
"""
def __init__(self, entries: list[tuple[bytes, list[tuple[int, bytes]]]]) -> None:
"""
Initialize resolve undo extension.
Parameters:
- entries: List of (path, [(stage, sha), ...]) tuples
"""
class UntrackedExtension(IndexExtension):
"""
Untracked cache extension for faster status operations.
Caches information about untracked files to speed up
git status operations.
"""
def __init__(self, data: bytes) -> None:
"""
Initialize untracked extension.
Parameters:
- data: Raw extension data
"""Standalone functions for index operations and manipulation.
def read_index(filename: str) -> Index:
"""
Read index from file.
Parameters:
- filename: Path to index file
Returns:
Index object loaded from file
"""
def write_index(filename: str, entries: Dict[bytes, IndexEntry]) -> None:
"""
Write index entries to file.
Parameters:
- filename: Path to index file
- entries: Dict mapping paths to IndexEntry objects
"""
def read_index_dict(filename: str) -> Dict[bytes, IndexEntry]:
"""
Read index as dictionary.
Parameters:
- filename: Path to index file
Returns:
Dict mapping file paths to IndexEntry objects
"""
def write_index_dict(
filename: str,
entries: Dict[bytes, IndexEntry],
version: int = 2
) -> None:
"""
Write index dictionary to file.
Parameters:
- filename: Path to index file
- entries: Dict mapping paths to IndexEntry objects
- version: Index format version
"""
def commit_tree(
object_store: BaseObjectStore,
index: Dict[bytes, IndexEntry]
) -> bytes:
"""
Create tree object from index entries.
Parameters:
- object_store: Object store for writing tree objects
- index: Dict mapping paths to IndexEntry objects
Returns:
SHA-1 hash of created tree
"""
def commit_index(
object_store: BaseObjectStore,
index: Index
) -> bytes:
"""
Create tree from Index object.
Parameters:
- object_store: Object store for writing objects
- index: Index object
Returns:
SHA-1 hash of created tree
"""
def build_index_from_tree(
object_store: BaseObjectStore,
tree_id: bytes,
honor_filemode: bool = True
) -> Dict[bytes, IndexEntry]:
"""
Build index entries from tree object.
Parameters:
- object_store: Object store containing tree objects
- tree_id: Tree SHA-1 hash
- honor_filemode: Respect file mode from tree
Returns:
Dict mapping paths to IndexEntry objects
"""
def changes_from_tree(
object_store: BaseObjectStore,
index: Dict[bytes, IndexEntry],
tree_id: bytes,
want_unchanged: bool = False
) -> Iterator[Tuple[bytes, int, bytes]]:
"""
Get changes between index and tree.
Parameters:
- object_store: Object store containing objects
- index: Index entries to compare
- tree_id: Tree SHA-1 hash to compare against
- want_unchanged: Include unchanged files
Yields:
Tuples of (path, change_type, sha1_hash)
"""
def blob_from_path_and_stat(
fs_path: bytes,
st: os.stat_result,
tree_encoding: str = "utf-8"
) -> Blob:
"""
Create blob from filesystem path and stat.
Parameters:
- fs_path: Filesystem path to file
- st: File stat information
- tree_encoding: Encoding for tree paths
Returns:
Blob object
"""
def blob_from_path_and_mode(
fs_path: bytes,
mode: int,
tree_encoding: str = "utf-8"
) -> Blob:
"""
Create blob from filesystem path and mode.
Parameters:
- fs_path: Filesystem path to file
- mode: File mode
- tree_encoding: Encoding for tree paths
Returns:
Blob object
"""
def index_entry_from_stat(
stat_val: os.stat_result,
hex_sha: bytes,
mode: Optional[int] = None,
) -> IndexEntry:
"""
Create index entry from stat result.
Parameters:
- stat_val: POSIX stat_result
- hex_sha: Hex SHA of the object
- mode: Optional file mode override
Returns:
IndexEntry object
"""
def index_entry_from_path(
path: bytes,
object_store: Optional[ObjectContainer] = None
) -> Optional[IndexEntry]:
"""
Create index entry from filesystem path.
Parameters:
- path: Path to create entry for
- object_store: Optional object store for new blobs
Returns:
IndexEntry object or None for directories
"""
def update_working_tree(
repo: "Repo",
old_tree_id: Optional[bytes],
new_tree_id: bytes,
change_iterator: Iterator["TreeChange"],
honor_filemode: bool = True,
validate_path_element: Optional[Callable[[bytes], bool]] = None,
symlink_fn: Optional[Callable] = None,
force_remove_untracked: bool = False,
blob_normalizer: Optional["BlobNormalizer"] = None,
tree_encoding: str = "utf-8",
allow_overwrite_modified: bool = False,
) -> None:
"""
Update working tree and index to match a new tree.
Parameters:
- repo: Repository object
- old_tree_id: SHA of tree before update
- new_tree_id: SHA of tree to update to
- change_iterator: Iterator of TreeChange objects
- honor_filemode: Honor core.filemode setting
- validate_path_element: Function to validate paths
- symlink_fn: Function for creating symlinks
- force_remove_untracked: Remove untracked files
- blob_normalizer: Line ending normalizer
- tree_encoding: Encoding for tree paths
- allow_overwrite_modified: Allow overwriting modified files
"""
def get_unstaged_changes(
index: Index,
root_path: Union[str, bytes],
filter_blob_callback: Optional[Callable] = None,
) -> Generator[bytes, None, None]:
"""
Find paths with unstaged changes.
Parameters:
- index: Index to check
- root_path: Root path to find files
- filter_blob_callback: Optional blob filter
Yields:
Paths with unstaged changes
"""
def refresh_index(index: Index, root_path: bytes) -> None:
"""
Refresh index contents from filesystem.
Parameters:
- index: Index to update
- root_path: Root filesystem path
"""Context manager for safely modifying the index.
class locked_index:
"""
Context manager for locking index during modifications.
Ensures atomic writes to the index file and proper
cleanup if operations fail.
"""
def __init__(self, path: Union[bytes, str]) -> None:
"""
Initialize locked index.
Parameters:
- path: Path to index file
"""
def __enter__(self) -> Index:
"""
Enter context and return locked index.
Returns:
Index object ready for modification
"""
def __exit__(
self,
exc_type: Optional[type],
exc_value: Optional[BaseException],
traceback: Optional[types.TracebackType],
) -> None:
"""
Exit context and write/abort changes.
Parameters:
- exc_type: Exception type if any
- exc_value: Exception value if any
- traceback: Exception traceback if any
"""Index-specific exceptions for error handling.
class UnsupportedIndexFormat(Exception):
"""
Raised when index format version is not supported.
Occurs when trying to read index with newer format
version than supported by current dulwich version.
"""
def __init__(self, version: int) -> None:
"""
Initialize exception.
Parameters:
- version: Unsupported index format version
"""
class UnmergedEntries(Exception):
"""
Raised when index contains unmerged entries.
Occurs during operations that require clean index
but merge conflicts exist.
"""from dulwich.index import (
Index, IndexEntry, locked_index,
build_index_from_tree, update_working_tree
)
from dulwich.objects import Blob
from dulwich.repo import Repo
import os
# Open repository index
index = Index("/path/to/repo/.git/index")
# Check if file is staged
if b"file.txt" in index:
entry = index[b"file.txt"]
print(f"File SHA-1: {entry.sha.hex()}")
print(f"File size: {entry.size}")
print(f"Stage: {entry.stage()}")
# Stage new file using locked context
with locked_index("/path/to/repo/.git/index") as index:
stat_result = os.stat("/path/to/file.txt")
entry = IndexEntry(
ctime=(int(stat_result.st_ctime), 0),
mtime=(int(stat_result.st_mtime), 0),
dev=stat_result.st_dev,
ino=stat_result.st_ino,
mode=stat_result.st_mode,
uid=stat_result.st_uid,
gid=stat_result.st_gid,
size=stat_result.st_size,
sha=b"new_blob_sha_here",
flags=0
)
index[b"file.txt"] = entry
# Index automatically written on context exit
# Build index from tree
repo = Repo("/path/to/repo")
build_index_from_tree(
"/path/to/worktree",
"/path/to/repo/.git/index",
repo.object_store,
b"tree_sha_here"
)
# Create tree from index
tree_sha = index.commit(repo.object_store)
print(f"Tree SHA-1: {tree_sha.hex()}")
# Check for conflicts
if index.has_conflicts():
print("Index has merge conflicts")
for path in index:
entry = index[path]
if isinstance(entry, ConflictedIndexEntry):
print(f"Conflict in {path.decode()}:")
if entry.ancestor:
print(f" Ancestor: {entry.ancestor.sha.hex()}")
if entry.this:
print(f" This: {entry.this.sha.hex()}")
if entry.other:
print(f" Other: {entry.other.sha.hex()}")Install with Tessl CLI
npx tessl i tessl/pypi-dulwich