Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool
—
Complete implementation of Git's object model including blobs, trees, commits, and tags. These classes provide full read/write capabilities for Git's internal object formats with proper serialization and validation.
Includes utility functions for object manipulation, type checking, time/timezone parsing, tree operations, and GPG signing support.
Core type aliases and constants used throughout the objects module.
# Type aliases
ObjectID = bytes # 20-byte SHA-1 hash identifier
# Constants
ZERO_SHA = b"0" * 40 # Zero/null SHA-1 hash as hex string
S_IFGITLINK = 0o160000 # Git submodule file mode constant
MAX_TIME = 9223372036854775807 # Maximum valid timestamp (2^63 - 1)
BEGIN_PGP_SIGNATURE = b"-----BEGIN PGP SIGNATURE-----" # PGP signature markerRuntime type checking functions for ShaFile objects with proper type narrowing support.
def is_commit(obj: ShaFile) -> TypeGuard[Commit]:
"""
Check if a ShaFile is a Commit object.
Args:
obj: ShaFile object to check
Returns:
True if object is a Commit, with type narrowing
"""
def is_tree(obj: ShaFile) -> TypeGuard[Tree]:
"""
Check if a ShaFile is a Tree object.
Args:
obj: ShaFile object to check
Returns:
True if object is a Tree, with type narrowing
"""
def is_blob(obj: ShaFile) -> TypeGuard[Blob]:
"""
Check if a ShaFile is a Blob object.
Args:
obj: ShaFile object to check
Returns:
True if object is a Blob, with type narrowing
"""
def is_tag(obj: ShaFile) -> TypeGuard[Tag]:
"""
Check if a ShaFile is a Tag object.
Args:
obj: ShaFile object to check
Returns:
True if object is a Tag, with type narrowing
"""Core utility functions for object operations and format conversions.
def sha_to_hex(sha: ObjectID) -> bytes:
"""
Convert binary SHA-1 to hex string.
Args:
sha: 20-byte binary SHA-1
Returns:
40-byte hex representation as bytes
"""
def hex_to_sha(hex: Union[bytes, str]) -> bytes:
"""
Convert hex SHA-1 string to binary.
Args:
hex: 40-character hex string (bytes or str)
Returns:
20-byte binary SHA-1
Raises:
ValueError: If hex string is invalid
"""
def valid_hexsha(hex: Union[bytes, str]) -> bool:
"""
Check if string is a valid hex SHA-1.
Args:
hex: String to validate
Returns:
True if valid 40-character hex SHA-1
"""
def hex_to_filename(path: Union[str, bytes], hex: Union[str, bytes]) -> Union[str, bytes]:
"""
Convert hex SHA to Git object filename path.
Args:
path: Base objects directory path
hex: 40-character hex SHA-1
Returns:
Full path to object file (first 2 hex chars as dir, rest as filename)
"""
def filename_to_hex(filename: Union[str, bytes]) -> str:
"""
Extract hex SHA from Git object filename.
Args:
filename: Path to Git object file
Returns:
40-character hex SHA-1 string
"""
def object_header(num_type: int, length: int) -> bytes:
"""
Create Git object header for given type and content length.
Args:
num_type: Object type number (1=commit, 2=tree, 3=blob, 4=tag)
length: Content length in bytes
Returns:
Object header as bytes (e.g., b"blob 123\0")
"""
def object_class(type: Union[bytes, int]) -> Optional[type[ShaFile]]:
"""
Get ShaFile subclass for given type identifier.
Args:
type: Type name (b"commit", b"tree", etc.) or type number (1-4)
Returns:
ShaFile subclass or None if type unknown
"""
def check_hexsha(hex: Union[str, bytes], error_msg: str) -> None:
"""
Validate hex SHA-1 string, raising exception if invalid.
Args:
hex: Hex string to check
error_msg: Error message prefix for exception
Raises:
ObjectFormatException: If hex SHA is invalid
"""
def check_identity(identity: Optional[bytes], error_msg: str) -> None:
"""
Validate Git identity string format ("Name <email>").
Args:
identity: Identity string to validate
error_msg: Error message for exception
Raises:
ObjectFormatException: If identity format is invalid
"""
def check_time(time_seconds: int) -> None:
"""
Validate timestamp to prevent overflow errors.
Args:
time_seconds: Unix timestamp to validate
Raises:
ObjectFormatException: If timestamp too large (> MAX_TIME)
"""
def git_line(*items: bytes) -> bytes:
"""
Format items into space-separated Git header line.
Args:
items: Byte strings to join
Returns:
Space-separated line with trailing newline
"""
def S_ISGITLINK(m: int) -> bool:
"""
Check if file mode indicates a Git submodule.
Args:
m: Unix file mode to check
Returns:
True if mode indicates submodule (S_IFGITLINK)
"""Utilities for parsing and formatting Git timestamp entries with timezone information.
def parse_timezone(text: bytes) -> tuple[int, bool]:
"""
Parse timezone offset string (e.g., "+0100", "-0500").
Args:
text: Timezone string starting with + or -
Returns:
Tuple of (offset_seconds, is_negative_utc)
- offset_seconds: Timezone offset in seconds from UTC
- is_negative_utc: True if "-0000" (negative UTC)
Raises:
ValueError: If timezone format is invalid
"""
def format_timezone(offset: int, unnecessary_negative_timezone: bool = False) -> bytes:
"""
Format timezone offset for Git serialization.
Args:
offset: Timezone offset in seconds from UTC
unnecessary_negative_timezone: Use minus sign for UTC/positive zones
Returns:
Formatted timezone string (e.g., b"+0100", b"-0500")
Raises:
ValueError: If offset not divisible by 60 seconds
"""
def parse_time_entry(value: bytes) -> tuple[bytes, Optional[int], tuple[Optional[int], bool]]:
"""
Parse Git time entry from author/committer/tagger line.
Args:
value: Git time entry ("Name <email> timestamp timezone")
Returns:
Tuple of (identity, timestamp, (timezone_offset, is_negative_utc))
Raises:
ObjectFormatException: If time entry format is invalid
"""
def format_time_entry(person: bytes, time: int, timezone_info: tuple[int, bool]) -> bytes:
"""
Format time entry for Git serialization.
Args:
person: Identity string ("Name <email>")
time: Unix timestamp
timezone_info: Tuple of (offset_seconds, is_negative_utc)
Returns:
Formatted time entry for Git object
"""Functions for parsing, serializing, and manipulating Git tree objects.
def parse_tree(text: bytes, strict: bool = False) -> Iterator[tuple[bytes, int, bytes]]:
"""
Parse serialized tree object data.
Args:
text: Raw tree object bytes
strict: Enable strict mode validation (reject leading zeros in modes)
Yields:
Tuples of (name, mode, hexsha) for each tree entry
Raises:
ObjectFormatException: If tree format is invalid
"""
def serialize_tree(items: Iterable[tuple[bytes, int, bytes]]) -> Iterator[bytes]:
"""
Serialize tree entries to Git tree format.
Args:
items: Iterable of (name, mode, hexsha) tuples (must be sorted)
Yields:
Byte chunks of serialized tree data
"""
def sorted_tree_items(
entries: dict[bytes, tuple[int, bytes]],
name_order: bool
) -> Iterator[TreeEntry]:
"""
Iterate tree entries in correct Git sort order.
Args:
entries: Dictionary mapping names to (mode, sha) tuples
name_order: If True, sort by name; if False, use Git tree order
Yields:
TreeEntry objects in proper sort order
"""
def key_entry(entry: tuple[bytes, tuple[int, ObjectID]]) -> bytes:
"""
Generate sort key for tree entry in Git tree order.
Args:
entry: Tuple of (name, (mode, sha))
Returns:
Sort key (directories get "/" suffix)
"""
def key_entry_name_order(entry: tuple[bytes, tuple[int, ObjectID]]) -> bytes:
"""
Generate sort key for tree entry in name order.
Args:
entry: Tuple of (name, (mode, sha))
Returns:
Name as sort key
"""
def pretty_format_tree_entry(
name: bytes,
mode: int,
hexsha: bytes,
encoding: str = "utf-8"
) -> str:
"""
Format tree entry for human-readable display.
Args:
name: Entry name
mode: File mode
hexsha: Object SHA-1
encoding: Text encoding for name display
Returns:
Formatted string like "100644 blob abc123... filename"
"""Specialized exceptions for Git object operations.
class EmptyFileException(FileFormatException):
"""
Raised when encountering unexpectedly empty Git object file.
This indicates a corrupted object or filesystem issue.
"""
class SubmoduleEncountered(Exception):
"""
Raised when submodule entry encountered during path traversal.
Contains information about the submodule path and commit SHA.
"""
def __init__(self, path: bytes, sha: ObjectID):
"""
Initialize submodule exception.
Args:
path: Path where submodule was encountered
sha: SHA-1 of submodule commit
"""
self.path = path
self.sha = shaFoundation class for all Git objects with common functionality.
class ShaFile:
"""
Base class for Git objects with SHA-1 identification.
All Git objects (blobs, trees, commits, tags) inherit from this class
and share common serialization and identification methods.
"""
@property
def id(self) -> bytes:
"""
SHA-1 hash of object.
Returns:
20-byte SHA-1 hash
"""
@property
def type_name(self) -> bytes:
"""
Git object type name.
Returns:
Object type as bytes (b'blob', b'tree', b'commit', b'tag')
"""
def as_raw_string(self) -> bytes:
"""
Serialize object to Git's internal format.
Returns:
Raw bytes in Git object format
"""
def as_raw_chunks(self) -> Iterator[bytes]:
"""
Serialize object as chunks for streaming.
Yields:
Byte chunks of serialized object
"""
def sha(self) -> Union[FixedSha, "HASH"]:
"""
Get SHA-1 hash object.
Returns:
SHA1 hash object for this Git object (FixedSha or hashlib SHA1)
"""
@staticmethod
def from_raw_string(
type_num: int,
string: bytes,
sha: Optional[ObjectID] = None
) -> 'ShaFile':
"""
Create object from raw Git format data.
Parameters:
- type_num: Git object type number (1=commit, 2=tree, 3=blob, 4=tag)
- string: Raw serialized object data
- sha: Optional known SHA-1 for the object
Returns:
ShaFile subclass instance
"""
@staticmethod
def from_raw_chunks(
type_num: int,
chunks: list[bytes],
sha: Optional[ObjectID] = None
) -> 'ShaFile':
"""
Create object from raw Git format chunks.
Parameters:
- type_num: Git object type number
- chunks: List of raw data chunks
- sha: Optional known SHA-1 for the object
Returns:
ShaFile subclass instance
"""
@classmethod
def from_string(cls, string: bytes) -> 'ShaFile':
"""
Create ShaFile from serialized string.
Parameters:
- string: Serialized object data
Returns:
ShaFile instance
"""
@classmethod
def from_file(cls, f: Union[BufferedIOBase, IO[bytes], "_GitFile"]) -> 'ShaFile':
"""
Load ShaFile from file object.
Parameters:
- f: File-like object to read from
Returns:
ShaFile instance
Raises:
ObjectFormatException: If object format is invalid
"""
def check(self) -> None:
"""
Validate object format and contents.
Raises:
ObjectFormatException: If object format is invalid
ChecksumMismatch: If SHA-1 doesn't match contents
"""
def as_legacy_object_chunks(self, compression_level: int = -1) -> Iterator[bytes]:
"""
Serialize object in legacy Git format as chunks.
Parameters:
- compression_level: zlib compression level (-1 for default)
Yields:
Compressed byte chunks
"""
def as_legacy_object(self, compression_level: int = -1) -> bytes:
"""
Serialize object in legacy Git format.
Parameters:
- compression_level: zlib compression level (-1 for default)
Returns:
Compressed object data
"""
def set_raw_chunks(
self,
chunks: list[bytes],
sha: Optional[ObjectID] = None
) -> None:
"""
Set object contents from raw data chunks.
Parameters:
- chunks: List of raw data chunks
- sha: Optional known SHA-1 for the object
"""
def copy(self) -> 'ShaFile':
"""
Create copy of this object.
Returns:
New ShaFile instance with same contents
"""
def raw_length(self) -> int:
"""
Get length of raw serialized data.
Returns:
Total byte length of serialized object
"""
def __hash__(self) -> int:
"""
Hash based on object ID.
Returns:
Hash of object's SHA-1
"""
def __ne__(self, other: object) -> bool:
"""
Check if objects are not equal.
Parameters:
- other: Object to compare with
Returns:
True if objects have different SHA-1s
"""
def __eq__(self, other: object) -> bool:
"""
Check if objects are equal.
Parameters:
- other: Object to compare with
Returns:
True if objects have same SHA-1
"""
def __lt__(self, other: object) -> bool:
"""
Compare objects by SHA-1.
Parameters:
- other: ShaFile to compare with
Returns:
True if this object's SHA-1 is less than other's
Raises:
TypeError: If other is not a ShaFile
"""
def __le__(self, other: object) -> bool:
"""
Compare objects by SHA-1 (less than or equal).
Parameters:
- other: ShaFile to compare with
Returns:
True if this object's SHA-1 is less than or equal to other's
Raises:
TypeError: If other is not a ShaFile
"""File content storage in Git repositories.
class Blob(ShaFile):
"""
Git blob object representing file contents.
Blobs store the raw content of files without any metadata
like filename, permissions, or directory structure.
"""
def __init__(self, data: bytes = b""):
"""
Create blob with file data.
Parameters:
- data: File content as bytes
"""
@property
def data(self) -> bytes:
"""
File content data.
Returns:
Raw file content as bytes
"""
@data.setter
def data(self, value: bytes) -> None:
"""
Set file content data.
Parameters:
- value: New file content as bytes
"""
@classmethod
def from_path(cls, path: Union[str, bytes]) -> 'Blob':
"""
Create blob from filesystem file.
Parameters:
- path: Path to file
Returns:
Blob object with file contents
Raises:
NotBlobError: If file is not a valid blob
"""
@property
def chunked(self) -> list[bytes]:
"""
Access blob data as chunks.
Returns:
List of data chunks (not necessarily lines)
"""
@chunked.setter
def chunked(self, chunks: list[bytes]) -> None:
"""
Set blob data from chunks.
Parameters:
- chunks: List of data chunks to set
"""
def splitlines(self) -> list[bytes]:
"""
Split blob data into lines.
Returns:
List of lines as bytes (preserves original line endings)
"""Directory structure representation in Git repositories.
class Tree(ShaFile):
"""
Git tree object representing directory structure.
Trees contain entries for files and subdirectories with
their names, modes, and SHA-1 hashes.
"""
def __init__(self):
"""Create empty tree."""
def add(
self,
name: bytes,
mode: int,
hexsha: bytes
) -> None:
"""
Add entry to tree.
Parameters:
- name: File or directory name
- mode: Unix file mode (permissions and type)
- hexsha: SHA-1 hash of object
"""
def __getitem__(self, name: bytes) -> Tuple[int, bytes]:
"""
Get tree entry by name.
Parameters:
- name: Entry name
Returns:
Tuple of (mode, sha1_hash)
"""
def __setitem__(
self,
name: bytes,
value: Tuple[int, bytes]
) -> None:
"""
Set tree entry.
Parameters:
- name: Entry name
- value: Tuple of (mode, sha1_hash)
"""
def __delitem__(self, name: bytes) -> None:
"""
Remove entry from tree.
Parameters:
- name: Entry name to remove
"""
def __contains__(self, name: bytes) -> bool:
"""
Check if entry exists in tree.
Parameters:
- name: Entry name
Returns:
True if entry exists
"""
def items(self) -> Iterator[TreeEntry]:
"""
Iterate over tree entries.
Yields:
TreeEntry named tuples (path, mode, sha)
"""
def iteritems(self) -> Iterator[Tuple[bytes, int, bytes]]:
"""
Iterate over tree entries as tuples.
Yields:
Tuples of (name, mode, sha1_hash)
"""
@classmethod
def from_path(cls, filename: Union[str, bytes]) -> 'Tree':
"""
Create tree from Git object file.
Parameters:
- filename: Path to Git tree object file
Returns:
Tree object loaded from file
Raises:
NotTreeError: If file is not a valid tree
"""
def lookup_path(
self,
lookup_obj: Callable[[ObjectID], ShaFile],
path: bytes
) -> tuple[int, ObjectID]:
"""
Look up object at path within tree.
Parameters:
- lookup_obj: Function to retrieve objects by SHA-1
- path: Path within tree (may contain subdirectories)
Returns:
Tuple of (mode, sha1_hash) for object at path
Raises:
SubmoduleEncountered: If path crosses submodule boundary
NotTreeError: If intermediate path component is not a tree
"""
def as_pretty_string(self) -> str:
"""
Format tree as human-readable string.
Returns:
Multi-line string showing tree contents in ls-tree format
"""
def add(self, name: bytes, mode: int, hexsha: bytes) -> None:
"""
Add entry to tree.
Parameters:
- name: Entry name as bytes
- mode: Unix file mode
- hexsha: SHA-1 hash as hex bytes
"""
def iteritems(self, name_order: bool = False) -> Iterator[TreeEntry]:
"""
Iterate over tree entries.
Parameters:
- name_order: If True, sort by name instead of Git tree order
Yields:
TreeEntry objects for each entry
"""Repository history and metadata storage.
class Commit(ShaFile):
"""
Git commit object representing repository snapshots.
Commits link trees with metadata including author, committer,
timestamp, message, and parent relationships.
"""
def __init__(self):
"""Create empty commit."""
@property
def message(self) -> bytes:
"""
Commit message.
Returns:
Commit message as bytes
"""
@message.setter
def message(self, value: bytes) -> None:
"""
Set commit message.
Parameters:
- value: Commit message as bytes
"""
@property
def author(self) -> bytes:
"""
Commit author information.
Returns:
Author in format b"Name <email>"
"""
@author.setter
def author(self, value: bytes) -> None:
"""
Set commit author.
Parameters:
- value: Author in format b"Name <email>"
"""
@property
def committer(self) -> bytes:
"""
Commit committer information.
Returns:
Committer in format b"Name <email>"
"""
@committer.setter
def committer(self, value: bytes) -> None:
"""
Set commit committer.
Parameters:
- value: Committer in format b"Name <email>"
"""
@property
def author_time(self) -> int:
"""
Author timestamp.
Returns:
Unix timestamp when authored
"""
@author_time.setter
def author_time(self, value: int) -> None:
"""
Set author timestamp.
Parameters:
- value: Unix timestamp
"""
@property
def commit_time(self) -> int:
"""
Commit timestamp.
Returns:
Unix timestamp when committed
"""
@commit_time.setter
def commit_time(self, value: int) -> None:
"""
Set commit timestamp.
Parameters:
- value: Unix timestamp
"""
@property
def author_timezone(self) -> int:
"""
Author timezone offset.
Returns:
Timezone offset in seconds
"""
@property
def commit_timezone(self) -> int:
"""
Commit timezone offset.
Returns:
Timezone offset in seconds
"""
@property
def tree(self) -> bytes:
"""
Tree SHA-1 hash.
Returns:
20-byte SHA-1 hash of commit tree
"""
@tree.setter
def tree(self, value: bytes) -> None:
"""
Set tree SHA-1 hash.
Parameters:
- value: 20-byte SHA-1 hash
"""
@property
def parents(self) -> List[bytes]:
"""
Parent commit SHA-1 hashes.
Returns:
List of parent commit SHA-1 hashes
"""
@parents.setter
def parents(self, value: List[bytes]) -> None:
"""
Set parent commit SHA-1 hashes.
Parameters:
- value: List of parent commit SHA-1 hashes
"""
@property
def encoding(self) -> bytes:
"""
Commit message encoding.
Returns:
Encoding name as bytes (e.g., b'utf-8')
"""
@encoding.setter
def encoding(self, value: bytes) -> None:
"""
Set commit message encoding.
Parameters:
- value: Encoding name as bytes
"""
@property
def gpgsig(self) -> Optional[bytes]:
"""
GPG signature for commit.
Returns:
GPG signature as bytes, or None if unsigned
"""
@gpgsig.setter
def gpgsig(self, value: Optional[bytes]) -> None:
"""
Set GPG signature.
Parameters:
- value: GPG signature as bytes or None
"""
@property
def extra(self) -> list[tuple[bytes, Optional[bytes]]]:
"""
Extra header fields not understood by this version.
Returns:
List of (field_name, field_value) tuples for unknown headers
"""
@property
def mergetag(self) -> list['Tag']:
"""
Associated signed tags for merge commits.
Returns:
List of Tag objects embedded in commit
"""
@mergetag.setter
def mergetag(self, value: list['Tag']) -> None:
"""
Set associated merge tags.
Parameters:
- value: List of Tag objects
"""
def sign(self, keyid: Optional[str] = None) -> None:
"""
Sign commit with GPG key.
Parameters:
- keyid: Optional GPG key ID to use for signing
Raises:
ImportError: If gpg module not available
"""
def verify(self, keyids: Optional[Iterable[str]] = None) -> None:
"""
Verify GPG signature on commit.
Parameters:
- keyids: Optional list of trusted key IDs
Raises:
gpg.errors.BadSignatures: If signature verification fails
gpg.errors.MissingSignatures: If not signed by trusted key
"""
def raw_without_sig(self) -> bytes:
"""
Get raw commit data without GPG signature.
Returns:
Raw serialized commit data excluding gpgsig field
"""Named references to specific commits with optional annotation.
class Tag(ShaFile):
"""
Git tag object for marking specific commits.
Tags provide human-readable names for commits with optional
annotation including tagger, timestamp, and message.
"""
def __init__(self):
"""Create empty tag."""
@property
def object(self) -> Tuple[int, bytes]:
"""
Tagged object information.
Returns:
Tuple of (object_type_num, sha1_hash)
"""
@object.setter
def object(self, value: Tuple[int, bytes]) -> None:
"""
Set tagged object.
Parameters:
- value: Tuple of (object_type_num, sha1_hash)
"""
@property
def name(self) -> bytes:
"""
Tag name.
Returns:
Tag name as bytes
"""
@name.setter
def name(self, value: bytes) -> None:
"""
Set tag name.
Parameters:
- value: Tag name as bytes
"""
@property
def tagger(self) -> bytes:
"""
Tag tagger information.
Returns:
Tagger in format b"Name <email>"
"""
@tagger.setter
def tagger(self, value: bytes) -> None:
"""
Set tag tagger.
Parameters:
- value: Tagger in format b"Name <email>"
"""
@property
def tag_time(self) -> int:
"""
Tag creation timestamp.
Returns:
Unix timestamp when tag was created
"""
@tag_time.setter
def tag_time(self, value: int) -> None:
"""
Set tag timestamp.
Parameters:
- value: Unix timestamp
"""
@property
def tag_timezone(self) -> int:
"""
Tag timezone offset.
Returns:
Timezone offset in seconds
"""
@property
def message(self) -> bytes:
"""
Tag annotation message.
Returns:
Tag message as bytes
"""
@message.setter
def message(self, value: bytes) -> None:
"""
Set tag message.
Parameters:
- value: Tag message as bytes
"""
@property
def signature(self) -> Optional[bytes]:
"""
Tag GPG signature.
Returns:
GPG signature as bytes, or None if unsigned
"""
@signature.setter
def signature(self, value: Optional[bytes]) -> None:
"""
Set GPG signature.
Parameters:
- value: GPG signature as bytes or None
"""
def sign(self, keyid: Optional[str] = None) -> None:
"""
Sign tag with GPG key.
Parameters:
- keyid: Optional GPG key ID to use for signing
Raises:
ImportError: If gpg module not available
"""
def verify(self, keyids: Optional[Iterable[str]] = None) -> None:
"""
Verify GPG signature on tag.
Parameters:
- keyids: Optional list of trusted key IDs
Raises:
gpg.errors.BadSignatures: If signature verification fails
gpg.errors.MissingSignatures: If not signed by trusted key
"""
def raw_without_sig(self) -> bytes:
"""
Get raw tag data without GPG signature.
Returns:
Raw serialized tag data excluding signature
"""Supporting classes and types for object manipulation.
class TreeEntry(namedtuple("TreeEntry", ["path", "mode", "sha"])):
"""
Named tuple representing a Git tree entry.
Fields:
- path: Entry name/path as bytes
- mode: Unix file mode (int)
- sha: SHA-1 hash as bytes
"""
def in_path(self, path: bytes) -> 'TreeEntry':
"""
Return copy of entry with given path prepended.
Parameters:
- path: Path prefix to prepend
Returns:
New TreeEntry with combined path
Raises:
TypeError: If path is not bytes
"""
class FixedSha:
"""
SHA-1 object with predetermined fixed value.
Used for objects where SHA-1 is known in advance,
avoiding need for recalculation during serialization.
"""
def __init__(self, hexsha: Union[str, bytes]) -> None:
"""
Initialize with hex SHA-1 string.
Parameters:
- hexsha: 40-character hex SHA-1 (str or bytes)
Raises:
TypeError: If hexsha is not str or bytes
"""
def digest(self) -> bytes:
"""
Get binary SHA-1 digest.
Returns:
20-byte binary SHA-1
"""
def hexdigest(self) -> str:
"""
Get hex SHA-1 digest.
Returns:
40-character hex string
"""from dulwich.objects import Blob
# Create blob from data
blob = Blob(b"Hello, world!\n")
print(f"Blob SHA-1: {blob.id.hex()}")
# Create blob from file
blob = Blob.from_path("/path/to/file.txt")
print(f"File size: {len(blob.data)} bytes")
# Process blob data
lines = blob.splitlines()
for i, line in enumerate(lines):
print(f"Line {i}: {line.decode('utf-8', errors='replace')}")from dulwich.objects import Tree, Blob
# Create tree with files
tree = Tree()
# Add file blob
file_blob = Blob(b"File content")
tree.add(b"file.txt", 0o100644, file_blob.id)
# Add subdirectory
subtree = Tree()
subtree.add(b"subfile.txt", 0o100644, Blob(b"Subfile content").id)
tree.add(b"subdir", 0o040000, subtree.id)
# Iterate tree entries
for entry in tree.items():
print(f"{entry.path.decode('utf-8')}: {entry.mode:o} {entry.sha.hex()}")from dulwich.objects import Commit, Tree, Blob
import time
# Create commit
commit = Commit()
commit.tree = tree.id
commit.author = b"John Doe <john@example.com>"
commit.committer = b"John Doe <john@example.com>"
commit.author_time = int(time.time())
commit.commit_time = int(time.time())
commit.author_timezone = 0
commit.commit_timezone = 0
commit.message = b"Initial commit\n"
commit.parents = [] # No parents for initial commit
print(f"Commit SHA-1: {commit.id.hex()}")from dulwich.objects import Tag
import time
# Create annotated tag
tag = Tag()
tag.object = (1, commit.id) # Tag a commit (type 1)
tag.name = b"v1.0.0"
tag.tagger = b"Release Manager <release@example.com>"
tag.tag_time = int(time.time())
tag.tag_timezone = 0
tag.message = b"Version 1.0.0 release\n"
print(f"Tag SHA-1: {tag.id.hex()}")from dulwich.objects import (
hex_to_sha, sha_to_hex, valid_hexsha,
parse_timezone, format_timezone,
is_commit, is_tree, is_blob, is_tag
)
# Working with SHA-1 conversions
hex_sha = b"1234567890abcdef1234567890abcdef12345678"
binary_sha = hex_to_sha(hex_sha)
back_to_hex = sha_to_hex(binary_sha)
# Validate SHA-1 strings
if valid_hexsha("abc123def456"):
print("Valid SHA-1")
# Parse timezone information
timezone_str = b"+0530" # India Standard Time
offset_seconds, is_negative_utc = parse_timezone(timezone_str)
print(f"Offset: {offset_seconds} seconds, Negative UTC: {is_negative_utc}")
# Format timezone back
formatted_tz = format_timezone(offset_seconds, is_negative_utc)
# Type checking with type guards
obj = Blob(b"some data")
if is_blob(obj):
# obj is now typed as Blob
print(f"Blob size: {len(obj.data)}")from dulwich.objects import Tree, parse_tree, serialize_tree, sorted_tree_items
# Parse raw tree data
tree_data = b"100644 file.txt\x00\x12\x34\x56..." # Raw tree bytes
entries = list(parse_tree(tree_data))
for name, mode, hexsha in entries:
print(f"{name.decode('utf-8')}: mode {mode:o}, SHA {hexsha.hex()}")
# Work with tree entries in proper order
tree = Tree()
tree.add(b"file1.txt", 0o100644, b"abc123" * 20)
tree.add(b"dir", 0o040000, b"def456" * 20)
# Get entries in Git tree order (directories sorted with trailing /)
for entry in sorted_tree_items(tree._entries, name_order=False):
print(f"{entry.path.decode('utf-8')}: {entry.mode:o}")
# Look up path in tree
def lookup_object(sha):
# Mock lookup function - in practice, use repo object store
return Tree() if sha == b"def456" * 20 else Blob(b"content")
try:
mode, sha = tree.lookup_path(lookup_object, b"dir/subfile.txt")
print(f"Found object: mode {mode:o}, SHA {sha.hex()}")
except SubmoduleEncountered as e:
print(f"Encountered submodule at {e.path}")# Sign a commit (requires gpg module)
try:
commit.sign() # Uses default GPG key
print("Commit signed successfully")
# Verify signature
commit.verify()
print("Signature verified")
# Get commit data without signature for external verification
unsigned_data = commit.raw_without_sig()
except ImportError:
print("GPG module not available")
except Exception as e:
print(f"GPG operation failed: {e}")
# Same operations work for tags
try:
tag.sign("specific-key-id")
tag.verify(["trusted-key-1", "trusted-key-2"])
except Exception as e:
print(f"Tag signing/verification failed: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-dulwich