CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pygit2

Python bindings for libgit2 providing comprehensive Git repository operations and version control functionality.

Pending
Overview
Eval results
Files

diff.mddocs/

Diff and Patches

Difference analysis between Git objects, working directory, and index. Provides detailed change information and patch generation with support for similarity detection, custom diff options, and patch application.

Capabilities

Diff Creation

Generate diffs between various Git entities including commits, trees, index, and working directory.

class Diff:
    @property
    def deltas(self) -> list[DiffDelta]:
        """List of file differences"""
    
    @property
    def stats(self) -> DiffStats:
        """Diff statistics"""
    
    def patch(self) -> str:
        """Generate unified diff patch"""
    
    def find_similar(
        self,
        flags: int = None,
        rename_threshold: int = 50,
        copy_threshold: int = 50,
        rename_from_rewrite_threshold: int = 50,
        break_rewrite_threshold: int = 60,
        rename_limit: int = 1000
    ):
        """
        Find renamed and copied files.
        
        Parameters:
        - flags: Similarity detection flags
        - rename_threshold: Threshold for rename detection (0-100)
        - copy_threshold: Threshold for copy detection (0-100)
        - rename_from_rewrite_threshold: Threshold for rename from rewrite
        - break_rewrite_threshold: Threshold for breaking rewrites
        - rename_limit: Maximum files to consider for renames
        """
    
    def merge(self, other: 'Diff') -> 'Diff':
        """Merge two diffs"""
    
    def __len__(self) -> int:
        """Number of deltas"""
    
    def __iter__(self):
        """Iterate over deltas"""
    
    def __getitem__(self, index: int) -> 'DiffDelta':
        """Get delta by index"""

# Repository diff methods
class Repository:
    def diff(
        self,
        a: Object | str = None,
        b: Object | str = None,
        cached: bool = False,
        flags: int = 0,
        context_lines: int = 3,
        interhunk_lines: int = 0,
        pathspecs: list[str] = None,
        max_size: int = 0,
        swap: bool = False
    ) -> Diff:
        """
        Create diff between objects.
        
        Parameters:
        - a: Source object (None = empty tree)
        - b: Target object (None = working directory, cached = index)
        - cached: Compare with index instead of working directory
        - flags: Diff option flags
        - context_lines: Lines of context around changes
        - interhunk_lines: Lines between hunks to merge them
        - pathspecs: Limit diff to specific paths
        - max_size: Maximum file size to diff
        - swap: Swap source and target
        
        Returns:
        Diff object
        """

Diff Deltas

DiffDelta represents the difference information for a single file.

class DiffDelta:
    @property
    def status(self) -> int:
        """Change status (added, deleted, modified, etc.)"""
    
    @property
    def flags(self) -> int:
        """Delta flags"""
    
    @property
    def similarity(self) -> int:
        """Similarity score for renames/copies (0-100)"""
    
    @property
    def nfiles(self) -> int:
        """Number of files in delta (1 or 2)"""
    
    @property
    def old_file(self) -> DiffFile:
        """Source file information"""
    
    @property
    def new_file(self) -> DiffFile:
        """Target file information"""
    
    @property
    def is_binary(self) -> bool:
        """True if file is binary"""

class DiffFile:
    @property
    def path(self) -> str:
        """File path"""
    
    @property
    def size(self) -> int:
        """File size"""
    
    @property
    def flags(self) -> int:
        """File flags"""
    
    @property
    def mode(self) -> int:
        """File mode"""
    
    @property
    def oid(self) -> Oid:
        """File object ID"""

# Delta Status Constants
GIT_DELTA_UNMODIFIED: int   # No change
GIT_DELTA_ADDED: int        # File added
GIT_DELTA_DELETED: int      # File deleted
GIT_DELTA_MODIFIED: int     # File modified
GIT_DELTA_RENAMED: int      # File renamed
GIT_DELTA_COPIED: int       # File copied
GIT_DELTA_IGNORED: int      # File ignored
GIT_DELTA_UNTRACKED: int    # File untracked
GIT_DELTA_TYPECHANGE: int   # File type changed
GIT_DELTA_UNREADABLE: int   # File unreadable
GIT_DELTA_CONFLICTED: int   # File conflicted

Patches

Patch objects provide detailed line-by-line difference information.

class Patch:
    @property
    def delta(self) -> DiffDelta:
        """Associated diff delta"""
    
    @property
    def hunks(self) -> list[DiffHunk]:
        """List of diff hunks"""
    
    @property
    def line_stats(self) -> tuple[int, int, int]:
        """Line statistics (context, additions, deletions)"""
    
    def data(self) -> str:
        """Patch as string"""
    
    def __str__(self) -> str:
        """String representation"""

class DiffHunk:
    @property
    def old_start(self) -> int:
        """Start line in old file"""
    
    @property
    def old_lines(self) -> int:
        """Number of lines in old file"""
    
    @property
    def new_start(self) -> int:
        """Start line in new file"""
    
    @property
    def new_lines(self) -> int:
        """Number of lines in new file"""
    
    @property
    def header(self) -> str:
        """Hunk header"""
    
    @property
    def lines(self) -> list[DiffLine]:
        """List of diff lines"""

class DiffLine:
    @property
    def origin(self) -> str:
        """Line origin ('+', '-', ' ')"""
    
    @property
    def old_lineno(self) -> int:
        """Line number in old file"""
    
    @property
    def new_lineno(self) -> int:
        """Line number in new file"""
    
    @property
    def num_lines(self) -> int:
        """Number of newlines in content"""
    
    @property
    def content(self) -> str:
        """Line content"""
    
    @property
    def raw_content(self) -> bytes:
        """Raw line content"""

Diff Statistics

DiffStats provides summary information about changes in a diff.

class DiffStats:
    @property
    def files_changed(self) -> int:
        """Number of files changed"""
    
    @property
    def insertions(self) -> int:
        """Number of lines inserted"""
    
    @property
    def deletions(self) -> int:
        """Number of lines deleted"""
    
    def format(self, format_flags: int, width: int = 80) -> str:
        """
        Format statistics as string.
        
        Parameters:
        - format_flags: Formatting options
        - width: Output width
        
        Returns:
        Formatted statistics string
        """

# Diff Stats Format Constants
GIT_DIFF_STATS_NONE: int            # No stats
GIT_DIFF_STATS_FULL: int            # Full stats with graph
GIT_DIFF_STATS_SHORT: int           # Short format
GIT_DIFF_STATS_NUMBER: int          # Numbers only
GIT_DIFF_STATS_INCLUDE_SUMMARY: int # Include summary

Diff Options

Constants for controlling diff generation behavior.

# Basic Diff Options
GIT_DIFF_NORMAL: int                # Standard diff
GIT_DIFF_REVERSE: int               # Reverse diff order
GIT_DIFF_INCLUDE_IGNORED: int       # Include ignored files
GIT_DIFF_RECURSE_IGNORED_DIRS: int  # Recurse into ignored dirs
GIT_DIFF_INCLUDE_UNTRACKED: int     # Include untracked files
GIT_DIFF_RECURSE_UNTRACKED_DIRS: int # Recurse into untracked dirs
GIT_DIFF_INCLUDE_UNMODIFIED: int    # Include unmodified files
GIT_DIFF_INCLUDE_TYPECHANGE: int    # Include type changes
GIT_DIFF_INCLUDE_TYPECHANGE_TREES: int # Include tree type changes
GIT_DIFF_IGNORE_FILEMODE: int       # Ignore file mode changes
GIT_DIFF_IGNORE_SUBMODULES: int     # Ignore submodules
GIT_DIFF_IGNORE_CASE: int           # Case insensitive
GIT_DIFF_DISABLE_PATHSPEC_MATCH: int # Disable pathspec matching
GIT_DIFF_SKIP_BINARY_CHECK: int     # Skip binary check
GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS: int # Fast untracked dirs

# Content Options
GIT_DIFF_FORCE_TEXT: int            # Treat all files as text
GIT_DIFF_FORCE_BINARY: int          # Treat all files as binary
GIT_DIFF_IGNORE_WHITESPACE: int     # Ignore whitespace
GIT_DIFF_IGNORE_WHITESPACE_CHANGE: int # Ignore whitespace changes
GIT_DIFF_IGNORE_WHITESPACE_EOL: int # Ignore EOL whitespace
GIT_DIFF_SHOW_UNTRACKED_CONTENT: int # Show untracked content
GIT_DIFF_SHOW_UNMODIFIED: int       # Show unmodified files
GIT_DIFF_PATIENCE: int              # Use patience diff algorithm
GIT_DIFF_MINIMAL: int               # Minimize diff size
GIT_DIFF_SHOW_BINARY: int           # Show binary diffs

# Similarity Detection Options
GIT_DIFF_FIND_RENAMES: int                    # Find renames
GIT_DIFF_FIND_COPIES: int                     # Find copies
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED: int     # Find copies from unmodified
GIT_DIFF_FIND_REWRITES: int                   # Find rewrites
GIT_DIFF_BREAK_REWRITES: int                  # Break rewrites
GIT_DIFF_FIND_AND_BREAK_REWRITES: int         # Find and break rewrites
GIT_DIFF_FIND_FOR_UNTRACKED: int              # Find for untracked files
GIT_DIFF_FIND_ALL: int                        # Find everything
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE: int  # Ignore leading whitespace
GIT_DIFF_FIND_IGNORE_WHITESPACE: int          # Ignore all whitespace
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE: int     # Don't ignore whitespace
GIT_DIFF_FIND_EXACT_MATCH_ONLY: int           # Exact matches only
GIT_DIFF_FIND_REMOVE_UNMODIFIED: int          # Remove unmodified from output

Usage Examples

Basic Diff Operations

import pygit2

repo = pygit2.Repository('/path/to/repo')

# Diff working directory to index
diff = repo.diff(cached=True)
print(f"Staged changes: {len(diff.deltas)} files")

# Diff working directory to HEAD
diff = repo.diff('HEAD')
print(f"Working directory changes: {len(diff.deltas)} files")

# Diff between commits
commit1 = repo.revparse_single('HEAD~5')
commit2 = repo.revparse_single('HEAD')
diff = repo.diff(commit1, commit2)
print(f"Changes in last 5 commits: {len(diff.deltas)} files")

# Diff between branches
main_commit = repo.branches['main'].target
feature_commit = repo.branches['feature'].target
diff = repo.diff(main_commit, feature_commit)

Analyzing Diff Details

# Examine each changed file
for delta in diff.deltas:
    status_names = {
        pygit2.GIT_DELTA_ADDED: "Added",
        pygit2.GIT_DELTA_DELETED: "Deleted",
        pygit2.GIT_DELTA_MODIFIED: "Modified",
        pygit2.GIT_DELTA_RENAMED: "Renamed",
        pygit2.GIT_DELTA_COPIED: "Copied",
        pygit2.GIT_DELTA_TYPECHANGE: "Type changed"
    }
    
    status = status_names.get(delta.status, "Unknown")
    old_path = delta.old_file.path
    new_path = delta.new_file.path
    
    if delta.status == pygit2.GIT_DELTA_RENAMED:
        print(f"{status}: {old_path} -> {new_path} ({delta.similarity}%)")
    else:
        print(f"{status}: {new_path}")
    
    if delta.is_binary:
        print("  (binary file)")
    else:
        print(f"  Size: {delta.old_file.size} -> {delta.new_file.size} bytes")

Working with Patches

# Generate patches for each file
for delta in diff.deltas:
    patch = delta.patch
    if patch:
        print(f"\n=== {delta.new_file.path} ===")
        print(f"Statistics: {patch.line_stats}")
        
        # Show detailed line changes
        for hunk in patch.hunks:
            print(f"\n@@ -{hunk.old_start},{hunk.old_lines} +{hunk.new_start},{hunk.new_lines} @@")
            print(hunk.header)
            
            for line in hunk.lines:
                marker = line.origin
                content = line.content.rstrip('\n')
                line_num = line.new_lineno if line.new_lineno > 0 else line.old_lineno
                print(f"{marker}{line_num:4d}: {content}")

# Generate unified patch
patch_text = diff.patch()
with open('changes.patch', 'w') as f:
    f.write(patch_text)

Diff Statistics

# Get summary statistics
stats = diff.stats
print(f"Files changed: {stats.files_changed}")
print(f"Insertions: {stats.insertions}")
print(f"Deletions: {stats.deletions}")

# Format statistics
stats_text = stats.format(
    pygit2.GIT_DIFF_STATS_FULL | pygit2.GIT_DIFF_STATS_INCLUDE_SUMMARY,
    width=80
)
print(stats_text)

Advanced Diff Options

# Ignore whitespace changes
diff = repo.diff(
    'HEAD~1',
    'HEAD',
    flags=pygit2.GIT_DIFF_IGNORE_WHITESPACE_CHANGE
)

# Include untracked files
diff = repo.diff(
    flags=pygit2.GIT_DIFF_INCLUDE_UNTRACKED
)

# Custom context lines
diff = repo.diff(
    'HEAD~1',
    'HEAD',
    context_lines=10
)

# Limit to specific paths
diff = repo.diff(
    'HEAD~1',
    'HEAD',
    pathspecs=['*.py', 'docs/']
)

Similarity Detection

# Find renames and copies
diff = repo.diff('HEAD~1', 'HEAD')
diff.find_similar(
    flags=pygit2.GIT_DIFF_FIND_RENAMES | pygit2.GIT_DIFF_FIND_COPIES,
    rename_threshold=50,
    copy_threshold=70
)

# Check for renames/copies
for delta in diff.deltas:
    if delta.status == pygit2.GIT_DELTA_RENAMED:
        print(f"Renamed: {delta.old_file.path} -> {delta.new_file.path}")
        print(f"Similarity: {delta.similarity}%")
    elif delta.status == pygit2.GIT_DELTA_COPIED:
        print(f"Copied: {delta.old_file.path} -> {delta.new_file.path}")
        print(f"Similarity: {delta.similarity}%")

Tree and Index Diffs

# Compare trees directly
tree1 = repo['HEAD~1'].tree
tree2 = repo['HEAD'].tree
diff = tree1.diff_to_tree(tree2)

# Compare tree to working directory
tree = repo['HEAD'].tree  
diff = tree.diff_to_workdir()

# Compare tree to index
diff = tree.diff_to_index()

# Compare index to working directory
diff = repo.index.diff_to_workdir()

Install with Tessl CLI

npx tessl i tessl/pypi-pygit2

docs

advanced.md

auth.md

config.md

diff.md

index.md

objects.md

references.md

remotes.md

repository.md

staging.md

tile.json