tessl/pypi-cloudpathlib

Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Cloud-Specific Operations

Name: tessl/pypi-cloudpathlib
Author: tessl

Advanced cloud storage features that go beyond traditional filesystem operations. These capabilities leverage cloud-native features like URL generation, presigned URLs, efficient copying, upload/download operations, caching management, and cloud service metadata access.

Capabilities

File Upload and Download

Transfer files between local filesystem and cloud storage.

def download_to(
    self,
    destination: typing.Union[str, "os.PathLike"]
) -> "pathlib.Path":
    """
    Download cloud file to local destination.
    
    Args:
        destination: Local path for downloaded file
        
    Returns:
        pathlib.Path object for downloaded file
    """

def upload_from(
    self,
    source: typing.Union[str, "os.PathLike"],
    **kwargs
) -> "CloudPath":
    """
    Upload local file to cloud storage.
    
    Args:
        source: Local file path to upload
        **kwargs: Cloud-specific upload options
        
    Returns:
        CloudPath object for uploaded file
    """

File and Directory Copying

Copy files and directories within and between cloud storage locations.

def copy(
    self,
    destination: typing.Union[str, "CloudPath"],
    **kwargs
) -> "CloudPath":
    """
    Copy file to destination.
    
    Args:
        destination: Target path for copy
        **kwargs: Cloud-specific copy options
        
    Returns:
        CloudPath object for copied file
    """

def copytree(
    self,
    destination: typing.Union[str, "CloudPath"],
    **kwargs
) -> "CloudPath":
    """
    Copy directory tree to destination.
    
    Args:
        destination: Target path for directory copy
        **kwargs: Cloud-specific copy options
        
    Returns:
        CloudPath object for copied directory
    """

URL Generation

Generate URLs for cloud resources with optional presigning.

def as_url(
    self,
    presign: bool = False,
    expire_seconds: int = 3600
) -> str:
    """
    Get URL for cloud resource.
    
    Args:
        presign: Generate presigned URL for temporary access
        expire_seconds: Expiration time for presigned URLs
        
    Returns:
        URL string for the cloud resource
    """

Cache Management

Control local file caching behavior.

def clear_cache(self) -> None:
    """
    Clear local cache for this path.
    Removes any locally cached copy of the file.
    """

@property
def fspath(self) -> str:
    """
    Get local filesystem path for cached file.
    Downloads and caches file if not already cached.
    
    Returns:
        Local path to cached file
    """

File Movement and Deletion

Move and delete cloud files and directories.

def unlink(self, missing_ok: bool = False) -> None:
    """
    Delete file from cloud storage.
    
    Args:
        missing_ok: Don't raise error if file doesn't exist
        
    Raises:
        CloudPathFileNotFoundError: File not found and missing_ok=False
    """

def rename(self, target: typing.Union[str, "CloudPath"]) -> "CloudPath":
    """
    Rename/move file to target location.
    
    Args:
        target: New path for the file
        
    Returns:
        CloudPath object for renamed file
    """

def replace(self, target: typing.Union[str, "CloudPath"]) -> "CloudPath":
    """
    Replace target with this file.
    
    Args:
        target: Path to replace
        
    Returns:
        CloudPath object for replacement location
    """

Usage Examples

File Upload and Download

from cloudpathlib import CloudPath
import pathlib

# Upload local file to cloud
local_file = pathlib.Path("local_data.csv")
cloud_path = CloudPath("s3://my-bucket/data.csv")

# Upload file
cloud_path.upload_from(local_file)
print(f"Uploaded to {cloud_path}")

# Download file from cloud
downloaded_path = cloud_path.download_to("downloaded_data.csv")
print(f"Downloaded to {downloaded_path}")

Directory Upload and Download

# Upload entire directory
local_dir = pathlib.Path("local_project/")
cloud_dir = CloudPath("s3://my-bucket/project/")

# Upload directory tree
for local_path in local_dir.rglob("*"):
    if local_path.is_file():
        relative_path = local_path.relative_to(local_dir)
        cloud_file = cloud_dir / relative_path
        cloud_file.upload_from(local_path)

# Download directory tree
for cloud_file in cloud_dir.rglob("*"):
    if cloud_file.is_file():
        relative_path = cloud_file.relative_to(cloud_dir)
        local_path = pathlib.Path("downloads") / relative_path
        local_path.parent.mkdir(parents=True, exist_ok=True)
        cloud_file.download_to(local_path)

Cloud-to-Cloud Copying

# Copy between cloud providers
s3_path = CloudPath("s3://source-bucket/file.txt")
gs_path = CloudPath("gs://dest-bucket/file.txt")

# Copy file between clouds (via local cache)
s3_path.copy(gs_path)

# Copy within same cloud service
source = CloudPath("s3://my-bucket/source/file.txt")
destination = CloudPath("s3://my-bucket/backup/file.txt")
source.copy(destination)

Directory Copying

# Copy entire directory tree
source_dir = CloudPath("s3://source-bucket/data/")
dest_dir = CloudPath("s3://backup-bucket/data-backup/")

# Copy directory tree
source_dir.copytree(dest_dir)

# Copy with filtering
source_dir = CloudPath("s3://my-bucket/project/")
backup_dir = CloudPath("s3://backup-bucket/project-backup/")

# Only copy specific file types
for file_path in source_dir.rglob("*"):
    if file_path.suffix in ['.py', '.md', '.txt']:
        relative_path = file_path.relative_to(source_dir)
        dest_path = backup_dir / relative_path
        file_path.copy(dest_path)

URL Generation

# Get public URL
cloud_path = CloudPath("s3://public-bucket/image.jpg")
public_url = cloud_path.as_url()
print(f"Public URL: {public_url}")

# Generate presigned URL for temporary access
private_path = CloudPath("s3://private-bucket/document.pdf")
signed_url = private_path.as_url(presign=True, expire_seconds=3600)
print(f"Temporary URL (1 hour): {signed_url}")

# Short-term presigned URL
temp_url = private_path.as_url(presign=True, expire_seconds=300)  # 5 minutes
print(f"Short-term URL: {temp_url}")

Cache Management

# Work with cached files
cloud_path = CloudPath("s3://my-bucket/large-file.dat")

# Get local cached path (downloads if needed)
local_cache_path = cloud_path.fspath
print(f"Cached at: {local_cache_path}")

# Work with local copy
with open(local_cache_path, 'rb') as f:
    data = f.read(1024)

# Clear cache when done
cloud_path.clear_cache()

# Force re-download on next access
fresh_cache_path = cloud_path.fspath

File Movement and Deletion

# Move/rename files
old_path = CloudPath("s3://my-bucket/old-name.txt")
new_path = CloudPath("s3://my-bucket/new-name.txt")

# Rename file
renamed = old_path.rename(new_path)
print(f"Renamed to: {renamed}")

# Move to different directory
source = CloudPath("s3://my-bucket/temp/file.txt")
target = CloudPath("s3://my-bucket/archive/file.txt")
moved = source.replace(target)

# Delete files
file_to_delete = CloudPath("s3://my-bucket/unwanted.txt")
file_to_delete.unlink()

# Safe deletion
try:
    file_to_delete.unlink()
except CloudPathFileNotFoundError:
    print("File already deleted")

# Safe deletion with missing_ok
file_to_delete.unlink(missing_ok=True)

Batch Operations

# Batch upload
local_files = pathlib.Path("local_data/").glob("*.csv")
cloud_base = CloudPath("s3://my-bucket/csv-data/")

for local_file in local_files:
    cloud_file = cloud_base / local_file.name
    cloud_file.upload_from(local_file)
    print(f"Uploaded {local_file.name}")

# Batch download
cloud_files = CloudPath("s3://my-bucket/results/").glob("*.json")
local_base = pathlib.Path("downloads/")
local_base.mkdir(exist_ok=True)

for cloud_file in cloud_files:
    local_file = local_base / cloud_file.name
    cloud_file.download_to(local_file)
    print(f"Downloaded {cloud_file.name}")

Monitoring Transfer Progress

def upload_with_progress(local_path, cloud_path):
    """Upload with progress monitoring."""
    file_size = local_path.stat().st_size
    print(f"Uploading {local_path.name} ({file_size} bytes)")
    
    # Upload file
    result = cloud_path.upload_from(local_path)
    print(f"Upload complete: {result}")
    return result

def download_with_progress(cloud_path, local_path):
    """Download with progress monitoring."""
    print(f"Downloading {cloud_path.name}")
    
    # Download file
    result = cloud_path.download_to(local_path)
    downloaded_size = result.stat().st_size
    print(f"Downloaded {downloaded_size} bytes to {result}")
    return result

# Usage
local_file = pathlib.Path("data.csv")
cloud_file = CloudPath("s3://my-bucket/data.csv")

upload_with_progress(local_file, cloud_file)
download_with_progress(cloud_file, "downloaded.csv")

Working with Temporary URLs

import requests

# Generate temporary download URL
cloud_path = CloudPath("s3://private-bucket/report.pdf")
download_url = cloud_path.as_url(presign=True, expire_seconds=1800)  # 30 minutes

# Use URL with external tools
response = requests.get(download_url)
with open("downloaded_report.pdf", "wb") as f:
    f.write(response.content)

# Generate upload URL (for some cloud providers)
upload_path = CloudPath("s3://uploads-bucket/new-file.txt")
upload_url = upload_path.as_url(presign=True, expire_seconds=600)  # 10 minutes

# External service can upload directly to this URL
print(f"Upload URL: {upload_url}")

Advanced Copy Operations

# Copy with metadata preservation
source = CloudPath("s3://source/important.doc")
destination = CloudPath("s3://backup/important.doc")

# Copy with cloud-specific options (varies by provider)
source.copy(destination, preserve_metadata=True)

# Conditional copying based on modification time
if not destination.exists() or source.stat().st_mtime > destination.stat().st_mtime:
    print("Source is newer, copying...")
    source.copy(destination)
else:
    print("Destination is up to date")

# Copy with different storage class (cloud-specific)
source.copy(destination, storage_class='GLACIER')  # S3 example

Install with Tessl CLI