CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-dulwich

Pure Python implementation of the Git version control system providing comprehensive access to Git repositories without requiring the Git command-line tool

Pending
Overview
Eval results
Files

clients.mddocs/

Git Protocol Clients

Network protocol implementations for communicating with Git servers over HTTP, SSH, and Git protocols with authentication and progress tracking.

Capabilities

Base Client Classes

Abstract base classes and common functionality for Git protocol clients.

class GitClient:
    """Abstract base class for Git protocol clients."""
    
    def fetch_pack(
        self,
        path: str,
        determine_wants: Callable,
        graph_walker: object,
        pack_data: Callable,
        progress: Optional[Callable] = None,
        depth: Optional[int] = None
    ) -> FetchPackResult:
        """
        Fetch a pack from the remote repository.
        
        Args:
            path: Repository path on remote
            determine_wants: Function to determine wanted objects
            graph_walker: Object graph walker
            pack_data: Callback for pack data
            progress: Optional progress callback
            depth: Optional shallow clone depth
            
        Returns:
            FetchPackResult with refs and symrefs
        """
    
    def send_pack(
        self,
        path: str,
        determine_wants: Callable,
        generate_pack_data: Callable,
        progress: Optional[Callable] = None
    ) -> SendPackResult:
        """
        Send a pack to the remote repository.
        
        Args:
            path: Repository path on remote
            determine_wants: Function to determine wanted objects
            generate_pack_data: Function to generate pack data
            progress: Optional progress callback
            
        Returns:
            SendPackResult with reference update status
        """
    
    def get_refs(self, path: str) -> Dict[bytes, bytes]:
        """
        Get references from remote repository.
        
        Args:
            path: Repository path on remote
            
        Returns:
            Dictionary mapping ref names to SHAs
        """
    
    def archive(
        self,
        path: str,
        committish: Optional[str] = None,
        write_data: Optional[Callable] = None,
        progress: Optional[Callable] = None,
        write_error: Optional[Callable] = None,
    ) -> None:
        """
        Retrieve an archive from remote repository.
        
        Args:
            path: Repository path on remote
            committish: Commit-ish to archive
            write_data: Function to write archive data
            progress: Optional progress callback
            write_error: Function to write error messages
        """
    
    def close(self) -> None:
        """
        Close the client and clean up resources.
        """

class TraditionalGitClient(GitClient):
    """Traditional Git protocol client implementation.
    
    Provides common functionality for Git protocol clients
    that use traditional Git wire protocol.
    """
    
    def __init__(
        self,
        can_read: Optional[Callable] = None,
        read: Optional[Callable] = None,
        write: Optional[Callable] = None,
        **kwargs
    ) -> None:
        """
        Initialize traditional Git client.
        
        Args:
            can_read: Function to check if data can be read
            read: Function to read data
            write: Function to write data
        """

Protocol-Specific Clients

Client implementations for different network protocols.

class TCPGitClient(TraditionalGitClient):
    """TCP-based Git protocol client.
    
    Connects to Git daemon over TCP socket on specified
    host and port.
    """
    
    def __init__(
        self,
        host: str,
        port: Optional[int] = None,
        **kwargs
    ) -> None:
        """
        Initialize TCP Git client.
        
        Args:
            host: Git server hostname
            port: Git server port (default: 9418)
        """

class SubprocessGitClient(TraditionalGitClient):
    """Subprocess-based Git client using local git command.
    
    Executes local git commands in subprocess to communicate
    with remote repositories.
    """
    
    def __init__(self, **kwargs) -> None:
        """
        Initialize subprocess Git client.
        """

class LocalGitClient(GitClient):
    """Local filesystem Git client.
    
    Accesses Git repositories directly on the local filesystem
    without network communication.
    """
    
    def __init__(self, **kwargs) -> None:
        """
        Initialize local Git client.
        """

class SSHGitClient(TraditionalGitClient):
    """SSH-based Git protocol client.
    
    Connects to Git repositories over SSH using configurable
    SSH vendor implementations.
    """
    
    def __init__(
        self,
        host: str,
        port: Optional[int] = None,
        username: Optional[str] = None,
        vendor: Optional["SSHVendor"] = None,
        **kwargs
    ) -> None:
        """
        Initialize SSH Git client.
        
        Args:
            host: SSH server hostname
            port: SSH server port (default: 22)
            username: SSH username
            vendor: SSH vendor implementation
        """

class BundleClient(GitClient):
    """Git bundle client for reading/writing Git bundles.
    
    Handles Git bundle format files that contain packaged
    Git objects and references.
    """
    
    def __init__(self, bundle_file: Union[str, IO]) -> None:
        """
        Initialize bundle client.
        
        Args:
            bundle_file: Path to bundle file or file-like object
        """

HTTP Clients

HTTP-based Git protocol clients with authentication support.

class AbstractHttpGitClient(GitClient):
    """Abstract HTTP Git protocol client.
    
    Base class for HTTP-based Git protocol implementations
    supporting smart HTTP protocol.
    """
    
    def __init__(
        self,
        base_url: str,
        dumb: Optional[bool] = None,
        **kwargs
    ) -> None:
        """
        Initialize HTTP Git client.
        
        Args:
            base_url: Base URL for Git repository
            dumb: Force dumb HTTP protocol
        """
    
    def _http_request(
        self,
        url: str,
        headers: Optional[Dict[str, str]] = None,
        data: Optional[bytes] = None
    ) -> object:
        """
        Make HTTP request.
        
        Args:
            url: Request URL
            headers: Optional HTTP headers
            data: Optional request body data
            
        Returns:
            HTTP response object
        """
    
    def _discover_references(
        self,
        service: str,
        base_url: str
    ) -> tuple[Dict[bytes, bytes], Dict[bytes, bytes]]:
        """
        Discover references from HTTP endpoint.
        
        Args:
            service: Git service name
            base_url: Repository base URL
            
        Returns:
            Tuple of (refs, symrefs)
        """

class Urllib3HttpGitClient(AbstractHttpGitClient):
    """HTTP Git client using urllib3 library.
    
    Provides HTTP transport using the urllib3 library
    with connection pooling and retry logic.
    """
    
    def __init__(
        self,
        base_url: str,
        config: Optional[object] = None,
        pool_manager: Optional["urllib3.PoolManager"] = None,
        **kwargs
    ) -> None:
        """
        Initialize urllib3 HTTP client.
        
        Args:
            base_url: Base URL for Git repository
            config: Git configuration object
            pool_manager: Optional urllib3 PoolManager
        """

Transport Functions

Utility functions for determining appropriate transport clients.

def get_transport_and_path(uri: str) -> Tuple[GitClient, str]:
    """
    Get appropriate transport client and path for URI.
    
    Args:
        uri: Git repository URI
        
    Returns:
        Tuple of (client, path)
    """

def get_transport_and_path_from_url(
    url: str,
    config: Optional[object] = None,
    **kwargs
) -> Tuple[GitClient, str]:
    """
    Get transport client and path from URL with options.
    
    Args:
        url: Repository URL
        config: Optional Git configuration
        **kwargs: Additional client options
        
    Returns:
        Tuple of (client, path)
    """

def parse_rsync_url(url: str) -> Tuple[Optional[str], str, Optional[str]]:
    """
    Parse rsync-style URL into components.
    
    Args:
        url: rsync-style URL (user@host:path)
        
    Returns:
        Tuple of (username, host, path)
    """

def default_local_git_client_cls() -> type[GitClient]:
    """
    Get default local Git client class.
    
    Returns:
        Git client class for local operations
    """

def default_user_agent_string() -> str:
    """
    Get default user agent string for HTTP clients.
    
    Returns:
        User agent string identifying Dulwich
    """

SSH Vendor Classes

SSH implementation backends for SSH-based Git clients.

class SSHVendor:
    """Abstract SSH vendor interface.
    
    Defines interface for different SSH implementations
    used by SSH Git clients.
    """
    
    def run_command(
        self,
        host: str,
        command: str,
        username: Optional[str] = None,
        port: Optional[int] = None,
        ssh_key: Optional[str] = None,
        **kwargs
    ) -> "SubprocessWrapper":
        """
        Run command on remote host via SSH.
        
        Args:
            host: Remote hostname
            command: Command to execute
            username: SSH username
            port: SSH port number
            ssh_key: Path to SSH private key
            
        Returns:
            SubprocessWrapper for the SSH connection
        """

class SubprocessSSHVendor(SSHVendor):
    """SSH vendor using subprocess to call ssh command.
    
    Uses the system 'ssh' command to create SSH connections.
    """
    
    def __init__(self, ssh_command: Optional[str] = None) -> None:
        """
        Initialize subprocess SSH vendor.
        
        Args:
            ssh_command: Custom SSH command (default: 'ssh')
        """

class PLinkSSHVendor(SSHVendor):
    """SSH vendor using PuTTY's plink command.
    
    Windows-specific SSH vendor that uses PuTTY's plink
    command for SSH connections.
    """
    
    def __init__(self, plink_command: Optional[str] = None) -> None:
        """
        Initialize PLink SSH vendor.
        
        Args:
            plink_command: Custom plink command (default: 'plink')
        """

class SubprocessWrapper:
    """
    Wrapper for subprocess providing Git protocol interface.
    
    Manages stdin, stdout, stderr streams for Git protocol
    communication over SSH or local subprocesses.
    """
    
    def __init__(self, proc: subprocess.Popen) -> None:
        """
        Initialize subprocess wrapper.
        
        Args:
            proc: Popen subprocess instance
        """
    
    def can_read(self) -> bool:
        """
        Check if data can be read from subprocess.
        
        Returns:
            True if data is available to read
        """
    
    def write(self, data: bytes) -> int:
        """
        Write data to subprocess stdin.
        
        Args:
            data: Data to write
            
        Returns:
            Number of bytes written
        """
    
    def read(self, size: int = -1) -> bytes:
        """
        Read data from subprocess stdout.
        
        Args:
            size: Number of bytes to read
            
        Returns:
            Data read from subprocess
        """
    
    def close(self) -> None:
        """
        Close subprocess and wait for termination.
        """

Result Classes

Data classes containing results from Git protocol operations.

class FetchPackResult:
    """Result from fetch_pack operation.
    
    Contains information about refs and capabilities
    returned by the remote during fetch.
    """
    
    refs: Dict[bytes, bytes]
    symrefs: Dict[bytes, bytes]
    agent: Optional[bytes]
    shallow: Optional[set[bytes]]
    new_shallow: Optional[set[bytes]]
    new_unshallow: Optional[set[bytes]]
    
    def __init__(
        self,
        refs: Dict[bytes, bytes],
        symrefs: Optional[Dict[bytes, bytes]] = None,
        agent: Optional[bytes] = None,
        shallow: Optional[set[bytes]] = None,
        new_shallow: Optional[set[bytes]] = None,
        new_unshallow: Optional[set[bytes]] = None,
    ) -> None:
        """
        Initialize fetch pack result.
        
        Args:
            refs: Dictionary of references
            symrefs: Dictionary of symbolic references
            agent: Remote agent string
            shallow: Set of shallow commit SHAs
            new_shallow: Set of newly shallow commits
            new_unshallow: Set of newly unshallow commits
        """

class SendPackResult:
    """Result from send_pack operation.
    
    Contains status of reference updates sent to remote.
    """
    
    ref_status: Dict[bytes, str]
    agent: Optional[bytes]
    pack_sent: bool
    
    def __init__(
        self,
        ref_status: Dict[bytes, str],
        agent: Optional[bytes] = None,
        pack_sent: bool = False,
    ) -> None:
        """
        Initialize send pack result.
        
        Args:
            ref_status: Status of each reference update
            agent: Remote agent string
            pack_sent: Whether pack data was sent
        """

class LsRemoteResult:
    """Result from ls_remote operation.
    
    Contains references and symbolic references from remote.
    """
    
    refs: Dict[bytes, bytes]
    symrefs: Dict[bytes, bytes]
    agent: Optional[bytes]
    
    def __init__(
        self,
        refs: Dict[bytes, bytes],
        symrefs: Optional[Dict[bytes, bytes]] = None,
        agent: Optional[bytes] = None,
    ) -> None:
        """
        Initialize ls-remote result.
        
        Args:
            refs: Dictionary of references
            symrefs: Dictionary of symbolic references
            agent: Remote agent string
        """

class ReportStatusParser:
    """Parser for Git report-status capability responses.
    
    Parses status reports from remote during push operations
    to determine success/failure of reference updates.
    """
    
    def __init__(self) -> None:
        """
        Initialize report status parser.
        """
    
    def check(self) -> Dict[bytes, str]:
        """
        Check status reports and return results.
        
        Returns:
            Dictionary mapping refs to status messages
        """

Validation Functions

Functions for validating protocol requests and responses.

def check_wants(wants: List[bytes], refs: Dict[bytes, bytes]) -> None:
    """
    Validate that wanted objects exist in refs.
    
    Args:
        wants: List of wanted object SHAs
        refs: Available references
        
    Raises:
        InvalidWants: If wanted objects are not available
    """

def _fileno_can_read(fileno: int) -> bool:
    """
    Check if file descriptor is ready for reading.
    
    Args:
        fileno: File descriptor number
        
    Returns:
        True if data can be read without blocking
    """

def _win32_peek_avail(handle: int) -> int:
    """
    Check available bytes in Windows named pipe.
    
    Args:
        handle: Windows pipe handle
        
    Returns:
        Number of bytes available to read
    """

Exception Classes

class InvalidWants(Exception):
    """Exception raised when client wants unavailable objects.
    
    Occurs when fetch operation requests objects that don't
    exist in the remote repository.
    """
    
    def __init__(self, wants: List[bytes]) -> None:
        """
        Initialize exception.
        
        Args:
            wants: List of unavailable object SHAs
        """

class HTTPUnauthorized(Exception):
    """Exception raised for HTTP authentication failures.
    
    Occurs when HTTP Git operations fail due to insufficient
    authentication credentials.
    """
    
    def __init__(self, www_authenticate: str, url: str) -> None:
        """
        Initialize exception.
        
        Args:
            www_authenticate: WWW-Authenticate header value
            url: URL that failed authentication
        """

class HTTPProxyUnauthorized(Exception):
    """Exception raised for HTTP proxy authentication failures.
    
    Occurs when HTTP proxy requires authentication that
    was not provided or was invalid.
    """
    
    def __init__(self, proxy_authenticate: str, url: str) -> None:
        """
        Initialize exception.
        
        Args:
            proxy_authenticate: Proxy-Authenticate header value
            url: URL that failed proxy authentication
        """

class StrangeHostname(Exception):
    """Exception raised for malformed hostnames.
    
    Occurs when hostname in Git URL cannot be parsed
    or contains invalid characters.
    """

Usage Examples

Basic Client Usage

from dulwich.client import (
    get_transport_and_path, 
    get_transport_and_path_from_url,
    InvalidWants,
    HTTPUnauthorized
)
from dulwich.repo import Repo

# Get appropriate client for URL
client, path = get_transport_and_path('https://github.com/user/repo.git')

# Fetch references
try:
    refs = client.get_refs(path)
    for ref_name, sha in refs.items():
        print(f"{ref_name.decode()}: {sha.hex()}")
except HTTPUnauthorized as e:
    print(f"Authentication failed for {e.url}")
except InvalidWants as e:
    print(f"Invalid objects requested: {e}")
finally:
    client.close()

# Advanced client usage with configuration
repo = Repo('/path/to/repo')
config = repo.get_config()
client, path = get_transport_and_path_from_url(
    'https://github.com/user/repo.git',
    config=config
)

# Fetch pack with progress
def progress_callback(message):
    print(f"Progress: {message.decode()}")

def determine_wants(refs, depth=None):
    # Return list of SHAs to fetch
    return [refs[b'refs/heads/main']]

def graph_walker_func():
    # Return graph walker for the repository
    return repo.get_graph_walker()

def pack_data_callback(data):
    # Process incoming pack data
    pass

result = client.fetch_pack(
    path,
    determine_wants,
    graph_walker_func(),
    pack_data_callback,
    progress=progress_callback
)

print(f"Fetched {len(result.refs)} references")
print(f"Remote agent: {result.agent}")
client.close()

Custom SSH Configuration

from dulwich.client import (
    SSHGitClient, 
    SubprocessSSHVendor, 
    PLinkSSHVendor
)
import sys

# Create SSH client with custom vendor
if sys.platform == 'win32':
    ssh_vendor = PLinkSSHVendor()
else:
    ssh_vendor = SubprocessSSHVendor()

client = SSHGitClient(
    'git.example.com', 
    username='git', 
    port=2222,
    vendor=ssh_vendor
)

# Use client for operations
try:
    refs = client.get_refs('/path/to/repo.git')
    for ref_name, sha in refs.items():
        print(f"{ref_name.decode()}: {sha.hex()}")
        
    # Clone repository using SSH
    def determine_wants(refs, depth=None):
        return list(refs.values())
        
    # Fetch all references
    result = client.fetch_pack(
        '/path/to/repo.git',
        determine_wants,
        None,  # graph_walker
        lambda data: None,  # pack_data
    )
    print(f"SSH fetch completed: {len(result.refs)} refs")
    
finally:
    client.close()

HTTP Client with Authentication

from dulwich.client import Urllib3HttpGitClient, HTTPUnauthorized
import urllib3

# Create HTTP client with custom pool manager
pool_manager = urllib3.PoolManager(
    cert_reqs='CERT_REQUIRED',
    ca_certs='/path/to/ca-certificates.crt'
)

client = Urllib3HttpGitClient(
    'https://github.com/user/repo.git',
    pool_manager=pool_manager
)

# Configure authentication if needed
# HTTP clients support various authentication methods:
# - Basic auth via URL: https://user:pass@github.com/user/repo.git
# - Token auth via environment or config

try:
    # Fetch references
    refs = client.get_refs('')
    for ref_name, sha in refs.items():
        print(f"{ref_name.decode()}: {sha.hex()}")
        
    # Smart HTTP operations
    def determine_wants(refs, depth=None):
        # Only fetch main branch
        if b'refs/heads/main' in refs:
            return [refs[b'refs/heads/main']]
        return []
    
    result = client.fetch_pack(
        '',  # Empty path for base URL
        determine_wants,
        None,  # graph_walker
        lambda data: print(f"Received {len(data)} bytes"),
    )
    
    print(f"HTTP fetch completed")
    print(f"Agent: {result.agent}")
    
except HTTPUnauthorized as e:
    print(f"Authentication required: {e.www_authenticate}")
    print(f"Failed URL: {e.url}")
finally:
    client.close()

# Bundle client example
from dulwich.client import BundleClient

# Read from bundle file
with open('backup.bundle', 'rb') as bundle_file:
    bundle_client = BundleClient(bundle_file)
    refs = bundle_client.get_refs('')
    print(f"Bundle contains {len(refs)} references")

Install with Tessl CLI

npx tessl i tessl/pypi-dulwich

docs

cli.md

clients.md

configuration.md

diff-merge.md

index-management.md

index.md

object-storage.md

objects.md

pack-files.md

porcelain.md

references.md

repository.md

tile.json