Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool
—
Network protocol implementations for communicating with Git servers over HTTP, SSH, and Git protocols with authentication and progress tracking.
Abstract base classes and common functionality for Git protocol clients.
class GitClient:
"""Abstract base class for Git protocol clients."""
def fetch_pack(
self,
path: str,
determine_wants: Callable,
graph_walker: object,
pack_data: Callable,
progress: Optional[Callable] = None,
depth: Optional[int] = None
) -> FetchPackResult:
"""
Fetch a pack from the remote repository.
Args:
path: Repository path on remote
determine_wants: Function to determine wanted objects
graph_walker: Object graph walker
pack_data: Callback for pack data
progress: Optional progress callback
depth: Optional shallow clone depth
Returns:
FetchPackResult with refs and symrefs
"""
def send_pack(
self,
path: str,
determine_wants: Callable,
generate_pack_data: Callable,
progress: Optional[Callable] = None
) -> SendPackResult:
"""
Send a pack to the remote repository.
Args:
path: Repository path on remote
determine_wants: Function to determine wanted objects
generate_pack_data: Function to generate pack data
progress: Optional progress callback
Returns:
SendPackResult with reference update status
"""
def get_refs(self, path: str) -> Dict[bytes, bytes]:
"""
Get references from remote repository.
Args:
path: Repository path on remote
Returns:
Dictionary mapping ref names to SHAs
"""
def archive(
self,
path: str,
committish: Optional[str] = None,
write_data: Optional[Callable] = None,
progress: Optional[Callable] = None,
write_error: Optional[Callable] = None,
) -> None:
"""
Retrieve an archive from remote repository.
Args:
path: Repository path on remote
committish: Commit-ish to archive
write_data: Function to write archive data
progress: Optional progress callback
write_error: Function to write error messages
"""
def close(self) -> None:
"""
Close the client and clean up resources.
"""
class TraditionalGitClient(GitClient):
"""Traditional Git protocol client implementation.
Provides common functionality for Git protocol clients
that use traditional Git wire protocol.
"""
def __init__(
self,
can_read: Optional[Callable] = None,
read: Optional[Callable] = None,
write: Optional[Callable] = None,
**kwargs
) -> None:
"""
Initialize traditional Git client.
Args:
can_read: Function to check if data can be read
read: Function to read data
write: Function to write data
"""Client implementations for different network protocols.
class TCPGitClient(TraditionalGitClient):
"""TCP-based Git protocol client.
Connects to Git daemon over TCP socket on specified
host and port.
"""
def __init__(
self,
host: str,
port: Optional[int] = None,
**kwargs
) -> None:
"""
Initialize TCP Git client.
Args:
host: Git server hostname
port: Git server port (default: 9418)
"""
class SubprocessGitClient(TraditionalGitClient):
"""Subprocess-based Git client using local git command.
Executes local git commands in subprocess to communicate
with remote repositories.
"""
def __init__(self, **kwargs) -> None:
"""
Initialize subprocess Git client.
"""
class LocalGitClient(GitClient):
"""Local filesystem Git client.
Accesses Git repositories directly on the local filesystem
without network communication.
"""
def __init__(self, **kwargs) -> None:
"""
Initialize local Git client.
"""
class SSHGitClient(TraditionalGitClient):
"""SSH-based Git protocol client.
Connects to Git repositories over SSH using configurable
SSH vendor implementations.
"""
def __init__(
self,
host: str,
port: Optional[int] = None,
username: Optional[str] = None,
vendor: Optional["SSHVendor"] = None,
**kwargs
) -> None:
"""
Initialize SSH Git client.
Args:
host: SSH server hostname
port: SSH server port (default: 22)
username: SSH username
vendor: SSH vendor implementation
"""
class BundleClient(GitClient):
"""Git bundle client for reading/writing Git bundles.
Handles Git bundle format files that contain packaged
Git objects and references.
"""
def __init__(self, bundle_file: Union[str, IO]) -> None:
"""
Initialize bundle client.
Args:
bundle_file: Path to bundle file or file-like object
"""HTTP-based Git protocol clients with authentication support.
class AbstractHttpGitClient(GitClient):
"""Abstract HTTP Git protocol client.
Base class for HTTP-based Git protocol implementations
supporting smart HTTP protocol.
"""
def __init__(
self,
base_url: str,
dumb: Optional[bool] = None,
**kwargs
) -> None:
"""
Initialize HTTP Git client.
Args:
base_url: Base URL for Git repository
dumb: Force dumb HTTP protocol
"""
def _http_request(
self,
url: str,
headers: Optional[Dict[str, str]] = None,
data: Optional[bytes] = None
) -> object:
"""
Make HTTP request.
Args:
url: Request URL
headers: Optional HTTP headers
data: Optional request body data
Returns:
HTTP response object
"""
def _discover_references(
self,
service: str,
base_url: str
) -> tuple[Dict[bytes, bytes], Dict[bytes, bytes]]:
"""
Discover references from HTTP endpoint.
Args:
service: Git service name
base_url: Repository base URL
Returns:
Tuple of (refs, symrefs)
"""
class Urllib3HttpGitClient(AbstractHttpGitClient):
"""HTTP Git client using urllib3 library.
Provides HTTP transport using the urllib3 library
with connection pooling and retry logic.
"""
def __init__(
self,
base_url: str,
config: Optional[object] = None,
pool_manager: Optional["urllib3.PoolManager"] = None,
**kwargs
) -> None:
"""
Initialize urllib3 HTTP client.
Args:
base_url: Base URL for Git repository
config: Git configuration object
pool_manager: Optional urllib3 PoolManager
"""Utility functions for determining appropriate transport clients.
def get_transport_and_path(uri: str) -> Tuple[GitClient, str]:
"""
Get appropriate transport client and path for URI.
Args:
uri: Git repository URI
Returns:
Tuple of (client, path)
"""
def get_transport_and_path_from_url(
url: str,
config: Optional[object] = None,
**kwargs
) -> Tuple[GitClient, str]:
"""
Get transport client and path from URL with options.
Args:
url: Repository URL
config: Optional Git configuration
**kwargs: Additional client options
Returns:
Tuple of (client, path)
"""
def parse_rsync_url(url: str) -> Tuple[Optional[str], str, Optional[str]]:
"""
Parse rsync-style URL into components.
Args:
url: rsync-style URL (user@host:path)
Returns:
Tuple of (username, host, path)
"""
def default_local_git_client_cls() -> type[GitClient]:
"""
Get default local Git client class.
Returns:
Git client class for local operations
"""
def default_user_agent_string() -> str:
"""
Get default user agent string for HTTP clients.
Returns:
User agent string identifying Dulwich
"""SSH implementation backends for SSH-based Git clients.
class SSHVendor:
"""Abstract SSH vendor interface.
Defines interface for different SSH implementations
used by SSH Git clients.
"""
def run_command(
self,
host: str,
command: str,
username: Optional[str] = None,
port: Optional[int] = None,
ssh_key: Optional[str] = None,
**kwargs
) -> "SubprocessWrapper":
"""
Run command on remote host via SSH.
Args:
host: Remote hostname
command: Command to execute
username: SSH username
port: SSH port number
ssh_key: Path to SSH private key
Returns:
SubprocessWrapper for the SSH connection
"""
class SubprocessSSHVendor(SSHVendor):
"""SSH vendor using subprocess to call ssh command.
Uses the system 'ssh' command to create SSH connections.
"""
def __init__(self, ssh_command: Optional[str] = None) -> None:
"""
Initialize subprocess SSH vendor.
Args:
ssh_command: Custom SSH command (default: 'ssh')
"""
class PLinkSSHVendor(SSHVendor):
"""SSH vendor using PuTTY's plink command.
Windows-specific SSH vendor that uses PuTTY's plink
command for SSH connections.
"""
def __init__(self, plink_command: Optional[str] = None) -> None:
"""
Initialize PLink SSH vendor.
Args:
plink_command: Custom plink command (default: 'plink')
"""
class SubprocessWrapper:
"""
Wrapper for subprocess providing Git protocol interface.
Manages stdin, stdout, stderr streams for Git protocol
communication over SSH or local subprocesses.
"""
def __init__(self, proc: subprocess.Popen) -> None:
"""
Initialize subprocess wrapper.
Args:
proc: Popen subprocess instance
"""
def can_read(self) -> bool:
"""
Check if data can be read from subprocess.
Returns:
True if data is available to read
"""
def write(self, data: bytes) -> int:
"""
Write data to subprocess stdin.
Args:
data: Data to write
Returns:
Number of bytes written
"""
def read(self, size: int = -1) -> bytes:
"""
Read data from subprocess stdout.
Args:
size: Number of bytes to read
Returns:
Data read from subprocess
"""
def close(self) -> None:
"""
Close subprocess and wait for termination.
"""Data classes containing results from Git protocol operations.
class FetchPackResult:
"""Result from fetch_pack operation.
Contains information about refs and capabilities
returned by the remote during fetch.
"""
refs: Dict[bytes, bytes]
symrefs: Dict[bytes, bytes]
agent: Optional[bytes]
shallow: Optional[set[bytes]]
new_shallow: Optional[set[bytes]]
new_unshallow: Optional[set[bytes]]
def __init__(
self,
refs: Dict[bytes, bytes],
symrefs: Optional[Dict[bytes, bytes]] = None,
agent: Optional[bytes] = None,
shallow: Optional[set[bytes]] = None,
new_shallow: Optional[set[bytes]] = None,
new_unshallow: Optional[set[bytes]] = None,
) -> None:
"""
Initialize fetch pack result.
Args:
refs: Dictionary of references
symrefs: Dictionary of symbolic references
agent: Remote agent string
shallow: Set of shallow commit SHAs
new_shallow: Set of newly shallow commits
new_unshallow: Set of newly unshallow commits
"""
class SendPackResult:
"""Result from send_pack operation.
Contains status of reference updates sent to remote.
"""
ref_status: Dict[bytes, str]
agent: Optional[bytes]
pack_sent: bool
def __init__(
self,
ref_status: Dict[bytes, str],
agent: Optional[bytes] = None,
pack_sent: bool = False,
) -> None:
"""
Initialize send pack result.
Args:
ref_status: Status of each reference update
agent: Remote agent string
pack_sent: Whether pack data was sent
"""
class LsRemoteResult:
"""Result from ls_remote operation.
Contains references and symbolic references from remote.
"""
refs: Dict[bytes, bytes]
symrefs: Dict[bytes, bytes]
agent: Optional[bytes]
def __init__(
self,
refs: Dict[bytes, bytes],
symrefs: Optional[Dict[bytes, bytes]] = None,
agent: Optional[bytes] = None,
) -> None:
"""
Initialize ls-remote result.
Args:
refs: Dictionary of references
symrefs: Dictionary of symbolic references
agent: Remote agent string
"""
class ReportStatusParser:
"""Parser for Git report-status capability responses.
Parses status reports from remote during push operations
to determine success/failure of reference updates.
"""
def __init__(self) -> None:
"""
Initialize report status parser.
"""
def check(self) -> Dict[bytes, str]:
"""
Check status reports and return results.
Returns:
Dictionary mapping refs to status messages
"""Functions for validating protocol requests and responses.
def check_wants(wants: List[bytes], refs: Dict[bytes, bytes]) -> None:
"""
Validate that wanted objects exist in refs.
Args:
wants: List of wanted object SHAs
refs: Available references
Raises:
InvalidWants: If wanted objects are not available
"""
def _fileno_can_read(fileno: int) -> bool:
"""
Check if file descriptor is ready for reading.
Args:
fileno: File descriptor number
Returns:
True if data can be read without blocking
"""
def _win32_peek_avail(handle: int) -> int:
"""
Check available bytes in Windows named pipe.
Args:
handle: Windows pipe handle
Returns:
Number of bytes available to read
"""class InvalidWants(Exception):
"""Exception raised when client wants unavailable objects.
Occurs when fetch operation requests objects that don't
exist in the remote repository.
"""
def __init__(self, wants: List[bytes]) -> None:
"""
Initialize exception.
Args:
wants: List of unavailable object SHAs
"""
class HTTPUnauthorized(Exception):
"""Exception raised for HTTP authentication failures.
Occurs when HTTP Git operations fail due to insufficient
authentication credentials.
"""
def __init__(self, www_authenticate: str, url: str) -> None:
"""
Initialize exception.
Args:
www_authenticate: WWW-Authenticate header value
url: URL that failed authentication
"""
class HTTPProxyUnauthorized(Exception):
"""Exception raised for HTTP proxy authentication failures.
Occurs when HTTP proxy requires authentication that
was not provided or was invalid.
"""
def __init__(self, proxy_authenticate: str, url: str) -> None:
"""
Initialize exception.
Args:
proxy_authenticate: Proxy-Authenticate header value
url: URL that failed proxy authentication
"""
class StrangeHostname(Exception):
"""Exception raised for malformed hostnames.
Occurs when hostname in Git URL cannot be parsed
or contains invalid characters.
"""from dulwich.client import (
get_transport_and_path,
get_transport_and_path_from_url,
InvalidWants,
HTTPUnauthorized
)
from dulwich.repo import Repo
# Get appropriate client for URL
client, path = get_transport_and_path('https://github.com/user/repo.git')
# Fetch references
try:
refs = client.get_refs(path)
for ref_name, sha in refs.items():
print(f"{ref_name.decode()}: {sha.hex()}")
except HTTPUnauthorized as e:
print(f"Authentication failed for {e.url}")
except InvalidWants as e:
print(f"Invalid objects requested: {e}")
finally:
client.close()
# Advanced client usage with configuration
repo = Repo('/path/to/repo')
config = repo.get_config()
client, path = get_transport_and_path_from_url(
'https://github.com/user/repo.git',
config=config
)
# Fetch pack with progress
def progress_callback(message):
print(f"Progress: {message.decode()}")
def determine_wants(refs, depth=None):
# Return list of SHAs to fetch
return [refs[b'refs/heads/main']]
def graph_walker_func():
# Return graph walker for the repository
return repo.get_graph_walker()
def pack_data_callback(data):
# Process incoming pack data
pass
result = client.fetch_pack(
path,
determine_wants,
graph_walker_func(),
pack_data_callback,
progress=progress_callback
)
print(f"Fetched {len(result.refs)} references")
print(f"Remote agent: {result.agent}")
client.close()from dulwich.client import (
SSHGitClient,
SubprocessSSHVendor,
PLinkSSHVendor
)
import sys
# Create SSH client with custom vendor
if sys.platform == 'win32':
ssh_vendor = PLinkSSHVendor()
else:
ssh_vendor = SubprocessSSHVendor()
client = SSHGitClient(
'git.example.com',
username='git',
port=2222,
vendor=ssh_vendor
)
# Use client for operations
try:
refs = client.get_refs('/path/to/repo.git')
for ref_name, sha in refs.items():
print(f"{ref_name.decode()}: {sha.hex()}")
# Clone repository using SSH
def determine_wants(refs, depth=None):
return list(refs.values())
# Fetch all references
result = client.fetch_pack(
'/path/to/repo.git',
determine_wants,
None, # graph_walker
lambda data: None, # pack_data
)
print(f"SSH fetch completed: {len(result.refs)} refs")
finally:
client.close()from dulwich.client import Urllib3HttpGitClient, HTTPUnauthorized
import urllib3
# Create HTTP client with custom pool manager
pool_manager = urllib3.PoolManager(
cert_reqs='CERT_REQUIRED',
ca_certs='/path/to/ca-certificates.crt'
)
client = Urllib3HttpGitClient(
'https://github.com/user/repo.git',
pool_manager=pool_manager
)
# Configure authentication if needed
# HTTP clients support various authentication methods:
# - Basic auth via URL: https://user:pass@github.com/user/repo.git
# - Token auth via environment or config
try:
# Fetch references
refs = client.get_refs('')
for ref_name, sha in refs.items():
print(f"{ref_name.decode()}: {sha.hex()}")
# Smart HTTP operations
def determine_wants(refs, depth=None):
# Only fetch main branch
if b'refs/heads/main' in refs:
return [refs[b'refs/heads/main']]
return []
result = client.fetch_pack(
'', # Empty path for base URL
determine_wants,
None, # graph_walker
lambda data: print(f"Received {len(data)} bytes"),
)
print(f"HTTP fetch completed")
print(f"Agent: {result.agent}")
except HTTPUnauthorized as e:
print(f"Authentication required: {e.www_authenticate}")
print(f"Failed URL: {e.url}")
finally:
client.close()
# Bundle client example
from dulwich.client import BundleClient
# Read from bundle file
with open('backup.bundle', 'rb') as bundle_file:
bundle_client = BundleClient(bundle_file)
refs = bundle_client.get_refs('')
print(f"Bundle contains {len(refs)} references")Install with Tessl CLI
npx tessl i tessl/pypi-dulwich