Python bindings for libgit2 providing comprehensive Git repository operations and version control functionality.
—
Advanced Git operations including blame, stashing, submodules, filtering, object database access, and specialized repository operations. These features provide fine-grained control over Git functionality for complex workflows.
Track the origin of each line in a file with detailed authorship information.
class Blame:
@property
def hunks(self) -> list[BlameHunk]:
"""List of blame hunks"""
def __len__(self) -> int:
"""Number of blame hunks"""
def __getitem__(self, index: int) -> BlameHunk:
"""Get blame hunk by index"""
def __iter__(self):
"""Iterate over blame hunks"""
def for_line(self, line_no: int) -> BlameHunk:
"""Get blame hunk for specific line"""
class BlameHunk:
@property
def lines_in_hunk(self) -> int:
"""Number of lines in hunk"""
@property
def final_commit_id(self) -> Oid:
"""Commit that last modified these lines"""
@property
def final_start_line_number(self) -> int:
"""Starting line number in final file"""
@property
def final_signature(self) -> Signature:
"""Author signature from final commit"""
@property
def orig_commit_id(self) -> Oid:
"""Original commit that introduced these lines"""
@property
def orig_path(self) -> str:
"""Original file path"""
@property
def orig_start_line_number(self) -> int:
"""Starting line number in original file"""
@property
def orig_signature(self) -> Signature:
"""Author signature from original commit"""
@property
def boundary(self) -> bool:
"""True if hunk is at boundary of blame range"""
# Repository blame method
class Repository:
def blame(
self,
path: str,
flags: int = None,
min_match_characters: int = None,
newest_commit: Oid = None,
oldest_commit: Oid = None,
min_line: int = None,
max_line: int = None
) -> Blame:
"""
Generate blame information for file.
Parameters:
- path: File path to blame
- flags: Blame option flags
- min_match_characters: Minimum characters for copy detection
- newest_commit: Most recent commit to consider
- oldest_commit: Oldest commit to consider
- min_line: First line to blame (1-based)
- max_line: Last line to blame (1-based)
Returns:
Blame object
"""
# Blame Flag Constants
GIT_BLAME_NORMAL: int # Normal blame
GIT_BLAME_TRACK_COPIES_SAME_FILE: int # Track copies within file
GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES: int # Track moves in same commit
GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES: int # Track copies in same commit
GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES: int # Track copies across commits
GIT_BLAME_FIRST_PARENT: int # Follow only first parent
GIT_BLAME_USE_MAILMAP: int # Use mailmap for mapping
GIT_BLAME_IGNORE_WHITESPACE: int # Ignore whitespace changesSave and restore working directory changes temporarily.
class Stash:
def save(
self,
stasher: Signature,
message: str,
flags: int = None,
paths: list[str] = None
) -> Oid:
"""
Save working directory to stash.
Parameters:
- stasher: Signature of person creating stash
- message: Stash message
- flags: Stash creation flags
- paths: Specific paths to stash (None = all)
Returns:
Stash commit OID
"""
def apply(
self,
index: int,
callbacks: 'StashApplyCallbacks' = None,
reinstate_index: bool = False
):
"""Apply stash to working directory"""
def pop(
self,
index: int,
callbacks: 'StashApplyCallbacks' = None,
reinstate_index: bool = False
):
"""Apply and remove stash"""
def drop(self, index: int):
"""Remove stash without applying"""
def __len__(self) -> int:
"""Number of stashes"""
def __getitem__(self, index: int) -> tuple[str, Oid]:
"""Get stash by index (message, oid)"""
def __iter__(self):
"""Iterate over stashes"""
# Repository stash access
class Repository:
@property
def stash(self) -> Stash:
"""Repository stash collection"""
# Stash Flag Constants
GIT_STASH_DEFAULT: int # Default behavior
GIT_STASH_KEEP_INDEX: int # Keep staged changes
GIT_STASH_INCLUDE_UNTRACKED: int # Include untracked files
GIT_STASH_INCLUDE_IGNORED: int # Include ignored files
GIT_STASH_KEEP_ALL: int # Keep all changes
# Stash Apply Flag Constants
GIT_STASH_APPLY_DEFAULT: int # Default apply
GIT_STASH_APPLY_REINSTATE_INDEX: int # Restore staged changes
class StashApplyCallbacks:
def apply_progress(self, progress: int) -> int:
"""Report apply progress"""
return 0
def checkout_notify(
self,
why: int,
path: str,
baseline: DiffFile,
target: DiffFile,
workdir: DiffFile
) -> int:
"""Checkout notification callback"""
return 0Manage Git submodules for nested repository structures.
class Submodule:
@property
def name(self) -> str:
"""Submodule name"""
@property
def path(self) -> str:
"""Submodule path in repository"""
@property
def url(self) -> str:
"""Submodule remote URL"""
@property
def branch(self) -> str:
"""Submodule tracking branch"""
@property
def head_id(self) -> Oid:
"""OID of submodule HEAD"""
@property
def index_id(self) -> Oid:
"""OID of submodule in index"""
@property
def workdir_id(self) -> Oid:
"""OID of submodule in working directory"""
@property
def ignore_rule(self) -> int:
"""Submodule ignore rule"""
@property
def update_rule(self) -> int:
"""Submodule update rule"""
def init(self, overwrite: bool = False):
"""Initialize submodule"""
def clone(self, callbacks: 'RemoteCallbacks' = None) -> Repository:
"""Clone submodule repository"""
def update(
self,
init: bool = False,
callbacks: 'RemoteCallbacks' = None
):
"""Update submodule"""
def sync(self):
"""Sync submodule URL"""
def reload(self, force: bool = False):
"""Reload submodule from config"""
def status(self) -> int:
"""Get submodule status flags"""
def location(self) -> int:
"""Get submodule location flags"""
class SubmoduleCollection:
def add(
self,
url: str,
path: str,
use_gitlink: bool = True
) -> Submodule:
"""Add new submodule"""
def __getitem__(self, name: str) -> Submodule:
"""Get submodule by name or path"""
def __contains__(self, name: str) -> bool:
"""Check if submodule exists"""
def __iter__(self):
"""Iterate over submodules"""
def __len__(self) -> int:
"""Number of submodules"""
# Repository submodule access
class Repository:
@property
def submodules(self) -> SubmoduleCollection:
"""Repository submodules"""
# Submodule Status Constants
GIT_SUBMODULE_STATUS_IN_HEAD: int # Submodule in HEAD
GIT_SUBMODULE_STATUS_IN_INDEX: int # Submodule in index
GIT_SUBMODULE_STATUS_IN_CONFIG: int # Submodule in config
GIT_SUBMODULE_STATUS_IN_WD: int # Submodule in working directory
GIT_SUBMODULE_STATUS_INDEX_ADDED: int # Submodule added to index
GIT_SUBMODULE_STATUS_INDEX_DELETED: int # Submodule deleted from index
GIT_SUBMODULE_STATUS_INDEX_MODIFIED: int # Submodule modified in index
GIT_SUBMODULE_STATUS_WD_UNINITIALIZED: int # Submodule not initialized
GIT_SUBMODULE_STATUS_WD_ADDED: int # Submodule added to workdir
GIT_SUBMODULE_STATUS_WD_DELETED: int # Submodule deleted from workdir
GIT_SUBMODULE_STATUS_WD_MODIFIED: int # Submodule modified in workdir
# Submodule Ignore Constants
GIT_SUBMODULE_IGNORE_UNSPECIFIED: int # Use config setting
GIT_SUBMODULE_IGNORE_NONE: int # Don't ignore changes
GIT_SUBMODULE_IGNORE_UNTRACKED: int # Ignore untracked files
GIT_SUBMODULE_IGNORE_DIRTY: int # Ignore dirty working directory
GIT_SUBMODULE_IGNORE_ALL: int # Ignore all changesCustom content filtering for clean/smudge operations.
class Filter:
def initialize(self):
"""Initialize filter"""
return 0
def shutdown(self):
"""Shutdown filter"""
def check(self, source: 'FilterSource', attr_values: list[str]) -> int:
"""Check if filter should apply"""
return 0
def apply(
self,
source: 'FilterSource',
to: 'FilterSource',
input_data: bytes
) -> bytes:
"""Apply filter transformation"""
return input_data
def stream(self, source: 'FilterSource', to: 'FilterSource'):
"""Stream-based filter application"""
pass
class FilterSource:
@property
def repository(self) -> Repository:
"""Source repository"""
@property
def path(self) -> str:
"""File path"""
@property
def file_mode(self) -> int:
"""File mode"""
@property
def oid(self) -> Oid:
"""File object ID"""
@property
def flags(self) -> int:
"""Filter flags"""
def filter_register(name: str, filter_obj: Filter, priority: int):
"""Register custom filter"""
def filter_unregister(name: str):
"""Unregister custom filter"""
# Filter Mode Constants
GIT_FILTER_TO_WORKTREE: int # Smudge (ODB -> workdir)
GIT_FILTER_TO_ODB: int # Clean (workdir -> ODB)
# Filter Flag Constants
GIT_FILTER_DEFAULT: int # Default behavior
GIT_FILTER_ALLOW_UNSAFE: int # Allow unsafe filtersLow-level access to Git's object database.
class Odb:
def read(self, oid: Oid) -> tuple[int, bytes]:
"""
Read object from database.
Returns:
Tuple of (object_type, data)
"""
def write(self, data: bytes, object_type: int) -> Oid:
"""Write object to database"""
def exists(self, oid: Oid) -> bool:
"""Check if object exists"""
def refresh(self):
"""Refresh object database"""
def add_backend(self, backend: 'OdbBackend', priority: int):
"""Add storage backend"""
def add_alternate(self, path: str, priority: int):
"""Add alternate object database"""
def __contains__(self, oid: Oid) -> bool:
"""Check if object exists"""
class OdbBackend:
"""Base class for ODB backends"""
pass
class OdbBackendLoose(OdbBackend):
def __init__(self, objects_dir: str, compression_level: int = -1):
"""Loose object backend"""
class OdbBackendPack(OdbBackend):
def __init__(self, objects_dir: str):
"""Pack file backend"""
# Repository ODB access
class Repository:
@property
def odb(self) -> Odb:
"""Object database"""
def init_file_backend(objects_dir: str) -> OdbBackend:
"""Initialize file-based ODB backend"""Build and manage Git pack files for efficient storage.
class PackBuilder:
def __init__(self, repo: Repository):
"""Create pack builder"""
def insert(self, oid: Oid, name: str = None) -> int:
"""Insert object into pack"""
def insert_tree(self, oid: Oid) -> int:
"""Insert tree and all referenced objects"""
def insert_commit(self, oid: Oid) -> int:
"""Insert commit and all referenced objects"""
def insert_walk(self, walk: Walker) -> int:
"""Insert objects from walker"""
def write(self, path: str = None) -> bytes:
"""Write pack to file or return pack data"""
def set_threads(self, threads: int):
"""Set number of threads for packing"""
@property
def written_objects_count(self) -> int:
"""Number of objects written"""Specialized repository operations for complex workflows.
class Repository:
# Cherry-pick and Revert
def cherry_pick(
self,
commit: Oid,
callbacks: 'CheckoutCallbacks' = None,
mainline: int = 0
):
"""Cherry-pick commit"""
def revert(
self,
commit: Oid,
callbacks: 'CheckoutCallbacks' = None,
mainline: int = 0
):
"""Revert commit"""
# Apply Operations
def apply(
self,
diff: Diff,
location: int = None,
callbacks: 'CheckoutCallbacks' = None
):
"""Apply diff/patch to repository"""
# History Operations
def descendant_of(self, commit: Oid, ancestor: Oid) -> bool:
"""Check if commit is descendant of ancestor"""
def ahead_behind(self, local: Oid, upstream: Oid) -> tuple[int, int]:
"""Count commits ahead/behind between branches"""
def merge_base(self, one: Oid, two: Oid) -> Oid:
"""Find merge base between commits"""
def merge_base_many(self, oids: list[Oid]) -> Oid:
"""Find merge base among multiple commits"""
# Worktree Operations
def create_worktree(
self,
name: str,
path: str,
reference: Reference = None
) -> Worktree:
"""Create new worktree"""
def worktrees(self) -> list[str]:
"""List worktree names"""
def lookup_worktree(self, name: str) -> Worktree:
"""Get worktree by name"""
class Worktree:
@property
def name(self) -> str:
"""Worktree name"""
@property
def path(self) -> str:
"""Worktree path"""
@property
def is_bare(self) -> bool:
"""True if worktree is bare"""
@property
def is_detached(self) -> bool:
"""True if HEAD is detached"""
@property
def is_locked(self) -> bool:
"""True if worktree is locked"""
def prune(self, flags: int = 0):
"""Prune worktree"""
def lock(self, reason: str = None):
"""Lock worktree"""
def unlock(self):
"""Unlock worktree"""import pygit2
repo = pygit2.Repository('/path/to/repo')
# Generate blame for file
blame = repo.blame('src/main.py')
print(f"Blame has {len(blame)} hunks")
# Analyze each line
with open('src/main.py', 'r') as f:
lines = f.readlines()
line_no = 1
for hunk in blame:
for i in range(hunk.lines_in_hunk):
if line_no <= len(lines):
line_content = lines[line_no - 1].rstrip()
commit = repo[hunk.final_commit_id]
author = hunk.final_signature
print(f"{hunk.final_commit_id.hex[:8]} {author.name:<20} "
f"{line_no:4d}: {line_content}")
line_no += 1
# Blame specific line range
blame = repo.blame('README.md', min_line=10, max_line=20)
# Blame with copy detection
blame = repo.blame(
'renamed_file.py',
flags=pygit2.GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES
)# Save working directory to stash
signature = pygit2.Signature('User', 'user@example.com')
stash_id = repo.stash.save(signature, 'Work in progress')
print(f"Created stash: {stash_id}")
# List stashes
print("Stashes:")
for i, (message, oid) in enumerate(repo.stash):
print(f" {i}: {message} ({oid.hex[:8]})")
# Apply most recent stash
if len(repo.stash) > 0:
repo.stash.apply(0)
# Pop stash (apply and remove)
if len(repo.stash) > 0:
repo.stash.pop(0)
# Stash with specific options
stash_id = repo.stash.save(
signature,
'Partial stash',
flags=pygit2.GIT_STASH_INCLUDE_UNTRACKED | pygit2.GIT_STASH_KEEP_INDEX,
paths=['src/*.py']
)# Add submodule
submodule = repo.submodules.add(
'https://github.com/library/repo.git',
'lib/external'
)
# Initialize and clone submodule
submodule.init()
sub_repo = submodule.clone()
# Update submodule
submodule.update()
# Check submodule status
status = submodule.status()
if status & pygit2.GIT_SUBMODULE_STATUS_WD_MODIFIED:
print("Submodule has uncommitted changes")
# Iterate over submodules
for name, submodule in repo.submodules:
print(f"Submodule {name}: {submodule.url} -> {submodule.path}")
print(f" Status: {submodule.status()}")class UppercaseFilter(pygit2.Filter):
def apply(self, source, to, input_data):
# Convert to uppercase when checking out
if to == pygit2.GIT_FILTER_TO_WORKTREE:
return input_data.upper()
# Convert to lowercase when checking in
else:
return input_data.lower()
# Register filter
pygit2.filter_register('uppercase', UppercaseFilter(), 100)
# Filter is applied based on .gitattributes:
# *.txt filter=uppercase# Direct object access
odb = repo.odb
# Check if object exists
if odb.exists(some_oid):
obj_type, data = odb.read(some_oid)
print(f"Object type: {obj_type}, size: {len(data)}")
# Write object directly
blob_data = b"Hello, World!"
blob_oid = odb.write(blob_data, pygit2.GIT_OBJECT_BLOB)
# Add alternate object database
odb.add_alternate('/path/to/other/repo/.git/objects', 1)# Create pack file
pack_builder = pygit2.PackBuilder(repo)
# Add objects to pack
for commit in repo.walk(repo.head.target):
pack_builder.insert(commit.oid)
pack_builder.insert_tree(commit.tree_id)
# Write pack
pack_data = pack_builder.write()
with open('custom.pack', 'wb') as f:
f.write(pack_data)
print(f"Packed {pack_builder.written_objects_count} objects")# Find merge base
main_oid = repo.branches['main'].target
feature_oid = repo.branches['feature'].target
merge_base = repo.merge_base(main_oid, feature_oid)
print(f"Merge base: {merge_base}")
# Count commits ahead/behind
ahead, behind = repo.ahead_behind(feature_oid, main_oid)
print(f"Feature is {ahead} commits ahead, {behind} commits behind main")
# Check ancestry
if repo.descendant_of(feature_oid, main_oid):
print("Feature branch is descendant of main")
# Cherry-pick commit
commit_to_pick = repo.revparse_single('feature~3')
repo.cherry_pick(commit_to_pick.oid)
# Create worktree
worktree = repo.create_worktree('feature-test', '/tmp/feature-test')
print(f"Created worktree: {worktree.name} at {worktree.path}")Install with Tessl CLI
npx tessl i tessl/pypi-pygit2