Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool
—
Comprehensive diff generation and merge algorithms for comparing trees, detecting changes, and resolving conflicts with rename detection.
Functions for comparing Git trees and detecting changes between different states.
def tree_changes(object_store, old_tree: bytes, new_tree: bytes,
want_unchanged: bool = False, rename_detector=None,
change_type_same: bool = False) -> Iterator[TreeChange]:
"""
Find changes between two trees.
Args:
object_store: Object store for loading trees
old_tree: SHA of old tree (None for empty)
new_tree: SHA of new tree (None for empty)
want_unchanged: Include unchanged files
rename_detector: RenameDetector instance
change_type_same: Include type changes as same
Yields:
TreeChange objects describing changes
"""
def tree_changes_for_merge(object_store, tree1: bytes, tree2: bytes,
base_tree: bytes) -> Iterator[TreeChange]:
"""
Find changes for three-way merge.
Args:
object_store: Object store for loading trees
tree1: SHA of first tree
tree2: SHA of second tree
base_tree: SHA of common base tree
Yields:
TreeChange objects for merge conflicts
"""
def walk_trees(object_store, tree1_id: bytes, tree2_id: bytes,
prune_identical: bool = True) -> Iterator[Tuple]:
"""
Walk two trees simultaneously.
Args:
object_store: Object store for loading trees
tree1_id: SHA of first tree
tree2_id: SHA of second tree
prune_identical: Skip identical subtrees
Yields:
Tuples of (path, mode1, sha1, mode2, sha2)
"""Functions for generating unified diff output from tree and working directory changes.
def diff_tree_to_tree(object_store, old_tree: bytes, new_tree: bytes,
outf=None) -> bytes:
"""
Generate unified diff between two trees.
Args:
object_store: Object store for loading objects
old_tree: SHA of old tree
new_tree: SHA of new tree
outf: Optional output file
Returns:
Diff as bytes if outf is None
"""
def diff_index_to_tree(object_store, index_path: str, tree: bytes,
outf=None) -> bytes:
"""
Generate diff between index and tree.
Args:
object_store: Object store for loading objects
index_path: Path to index file
tree: SHA of tree to compare against
outf: Optional output file
Returns:
Diff as bytes if outf is None
"""
def diff_working_tree_to_tree(object_store, tree: bytes, root_path: str = ".",
outf=None) -> bytes:
"""
Generate diff between working tree and Git tree.
Args:
object_store: Object store for loading objects
tree: SHA of tree to compare against
root_path: Root path of working tree
outf: Optional output file
Returns:
Diff as bytes if outf is None
"""
def diff_working_tree_to_index(object_store, index_path: str,
root_path: str = ".", outf=None) -> bytes:
"""
Generate diff between working tree and index.
Args:
object_store: Object store for loading objects
index_path: Path to index file
root_path: Root path of working tree
outf: Optional output file
Returns:
Diff as bytes if outf is None
"""Classes and functions for detecting file renames and copies in diffs.
class RenameDetector:
"""Detect renames and copies in tree changes."""
def __init__(self, object_store, rename_threshold: int = 60,
copy_threshold: int = 60, max_files: int = None,
big_file_threshold: int = None): ...
def changes_with_renames(self, changes: List[TreeChange]) -> List[TreeChange]:
"""
Process changes to detect renames and copies.
Args:
changes: List of TreeChange objects
Returns:
List of TreeChange objects with renames detected
"""
def find_renames(self, added_files: List[TreeChange],
deleted_files: List[TreeChange]) -> List[Tuple[TreeChange, TreeChange]]:
"""
Find renames between added and deleted files.
Args:
added_files: List of added files
deleted_files: List of deleted files
Returns:
List of (deleted, added) pairs that are renames
"""Classes representing different types of changes between trees.
class TreeChange:
"""Represents a change between two trees."""
type: str # 'add', 'delete', 'modify', 'rename', 'copy', 'unchanged'
old: TreeEntry # (name, mode, sha) or None
new: TreeEntry # (name, mode, sha) or None
def __init__(self, type: str, old: TreeEntry = None, new: TreeEntry = None): ...
@property
def is_rename(self) -> bool:
"""True if this change represents a rename."""
@property
def is_copy(self) -> bool:
"""True if this change represents a copy."""
@property
def is_modify(self) -> bool:
"""True if this change represents a modification."""Classes for generating colorized diff output.
class ColorizedDiffStream:
"""Stream wrapper that adds ANSI color codes to diff output."""
def __init__(self, f, color_config=None): ...
def write(self, data: bytes) -> None:
"""Write colorized diff data to underlying stream."""from dulwich.repo import Repo
from dulwich.diff_tree import tree_changes
# Open repository
repo = Repo('.')
# Get two commits to compare
commit1 = repo[b'HEAD~1']
commit2 = repo[b'HEAD']
# Find changes between the commits
changes = tree_changes(repo.object_store, commit1.tree, commit2.tree)
for change in changes:
if change.type == 'add':
print(f"Added: {change.new.path}")
elif change.type == 'delete':
print(f"Deleted: {change.old.path}")
elif change.type == 'modify':
print(f"Modified: {change.new.path}")from dulwich.repo import Repo
from dulwich.diff import diff_tree_to_tree
import sys
repo = Repo('.')
# Compare two commits
commit1 = repo[b'HEAD~1']
commit2 = repo[b'HEAD']
# Generate unified diff
diff_output = diff_tree_to_tree(repo.object_store, commit1.tree, commit2.tree)
# Write to stdout
sys.stdout.buffer.write(diff_output)from dulwich.repo import Repo
from dulwich.diff_tree import tree_changes, RenameDetector
repo = Repo('.')
# Create rename detector
rename_detector = RenameDetector(repo.object_store, rename_threshold=50)
# Get changes between commits
commit1 = repo[b'HEAD~1']
commit2 = repo[b'HEAD']
changes = list(tree_changes(repo.object_store, commit1.tree, commit2.tree))
# Detect renames
changes_with_renames = rename_detector.changes_with_renames(changes)
for change in changes_with_renames:
if change.is_rename:
print(f"Renamed: {change.old.path} -> {change.new.path}")
elif change.is_copy:
print(f"Copied: {change.old.path} -> {change.new.path}")from dulwich.repo import Repo
from dulwich.diff import diff_working_tree_to_tree
repo = Repo('.')
# Compare working tree to HEAD
head_tree = repo[repo.head()].tree
diff_output = diff_working_tree_to_tree(repo.object_store, head_tree)
print(diff_output.decode('utf-8'))from dulwich.repo import Repo
from dulwich.diff import diff_tree_to_tree, ColorizedDiffStream
import sys
repo = Repo('.')
# Create colorized output stream
colorized_stream = ColorizedDiffStream(sys.stdout.buffer)
# Generate colorized diff
commit1 = repo[b'HEAD~1']
commit2 = repo[b'HEAD']
diff_tree_to_tree(repo.object_store, commit1.tree, commit2.tree,
outf=colorized_stream)Install with Tessl CLI
npx tessl i tessl/pypi-dulwich