CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-cloudpathlib

Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

cloud-operations.mddocs/

Cloud-Specific Operations

Advanced cloud storage features that go beyond traditional filesystem operations. These capabilities leverage cloud-native features like URL generation, presigned URLs, efficient copying, upload/download operations, caching management, and cloud service metadata access.

Capabilities

File Upload and Download

Transfer files between local filesystem and cloud storage.

def download_to(
    self,
    destination: typing.Union[str, "os.PathLike"]
) -> "pathlib.Path":
    """
    Download cloud file to local destination.
    
    Args:
        destination: Local path for downloaded file
        
    Returns:
        pathlib.Path object for downloaded file
    """

def upload_from(
    self,
    source: typing.Union[str, "os.PathLike"],
    **kwargs
) -> "CloudPath":
    """
    Upload local file to cloud storage.
    
    Args:
        source: Local file path to upload
        **kwargs: Cloud-specific upload options
        
    Returns:
        CloudPath object for uploaded file
    """

File and Directory Copying

Copy files and directories within and between cloud storage locations.

def copy(
    self,
    destination: typing.Union[str, "CloudPath"],
    **kwargs
) -> "CloudPath":
    """
    Copy file to destination.
    
    Args:
        destination: Target path for copy
        **kwargs: Cloud-specific copy options
        
    Returns:
        CloudPath object for copied file
    """

def copytree(
    self,
    destination: typing.Union[str, "CloudPath"],
    **kwargs
) -> "CloudPath":
    """
    Copy directory tree to destination.
    
    Args:
        destination: Target path for directory copy
        **kwargs: Cloud-specific copy options
        
    Returns:
        CloudPath object for copied directory
    """

URL Generation

Generate URLs for cloud resources with optional presigning.

def as_url(
    self,
    presign: bool = False,
    expire_seconds: int = 3600
) -> str:
    """
    Get URL for cloud resource.
    
    Args:
        presign: Generate presigned URL for temporary access
        expire_seconds: Expiration time for presigned URLs
        
    Returns:
        URL string for the cloud resource
    """

Cache Management

Control local file caching behavior.

def clear_cache(self) -> None:
    """
    Clear local cache for this path.
    Removes any locally cached copy of the file.
    """

@property
def fspath(self) -> str:
    """
    Get local filesystem path for cached file.
    Downloads and caches file if not already cached.
    
    Returns:
        Local path to cached file
    """

File Movement and Deletion

Move and delete cloud files and directories.

def unlink(self, missing_ok: bool = False) -> None:
    """
    Delete file from cloud storage.
    
    Args:
        missing_ok: Don't raise error if file doesn't exist
        
    Raises:
        CloudPathFileNotFoundError: File not found and missing_ok=False
    """

def rename(self, target: typing.Union[str, "CloudPath"]) -> "CloudPath":
    """
    Rename/move file to target location.
    
    Args:
        target: New path for the file
        
    Returns:
        CloudPath object for renamed file
    """

def replace(self, target: typing.Union[str, "CloudPath"]) -> "CloudPath":
    """
    Replace target with this file.
    
    Args:
        target: Path to replace
        
    Returns:
        CloudPath object for replacement location
    """

Usage Examples

File Upload and Download

from cloudpathlib import CloudPath
import pathlib

# Upload local file to cloud
local_file = pathlib.Path("local_data.csv")
cloud_path = CloudPath("s3://my-bucket/data.csv")

# Upload file
cloud_path.upload_from(local_file)
print(f"Uploaded to {cloud_path}")

# Download file from cloud
downloaded_path = cloud_path.download_to("downloaded_data.csv")
print(f"Downloaded to {downloaded_path}")

Directory Upload and Download

# Upload entire directory
local_dir = pathlib.Path("local_project/")
cloud_dir = CloudPath("s3://my-bucket/project/")

# Upload directory tree
for local_path in local_dir.rglob("*"):
    if local_path.is_file():
        relative_path = local_path.relative_to(local_dir)
        cloud_file = cloud_dir / relative_path
        cloud_file.upload_from(local_path)

# Download directory tree
for cloud_file in cloud_dir.rglob("*"):
    if cloud_file.is_file():
        relative_path = cloud_file.relative_to(cloud_dir)
        local_path = pathlib.Path("downloads") / relative_path
        local_path.parent.mkdir(parents=True, exist_ok=True)
        cloud_file.download_to(local_path)

Cloud-to-Cloud Copying

# Copy between cloud providers
s3_path = CloudPath("s3://source-bucket/file.txt")
gs_path = CloudPath("gs://dest-bucket/file.txt")

# Copy file between clouds (via local cache)
s3_path.copy(gs_path)

# Copy within same cloud service
source = CloudPath("s3://my-bucket/source/file.txt")
destination = CloudPath("s3://my-bucket/backup/file.txt")
source.copy(destination)

Directory Copying

# Copy entire directory tree
source_dir = CloudPath("s3://source-bucket/data/")
dest_dir = CloudPath("s3://backup-bucket/data-backup/")

# Copy directory tree
source_dir.copytree(dest_dir)

# Copy with filtering
source_dir = CloudPath("s3://my-bucket/project/")
backup_dir = CloudPath("s3://backup-bucket/project-backup/")

# Only copy specific file types
for file_path in source_dir.rglob("*"):
    if file_path.suffix in ['.py', '.md', '.txt']:
        relative_path = file_path.relative_to(source_dir)
        dest_path = backup_dir / relative_path
        file_path.copy(dest_path)

URL Generation

# Get public URL
cloud_path = CloudPath("s3://public-bucket/image.jpg")
public_url = cloud_path.as_url()
print(f"Public URL: {public_url}")

# Generate presigned URL for temporary access
private_path = CloudPath("s3://private-bucket/document.pdf")
signed_url = private_path.as_url(presign=True, expire_seconds=3600)
print(f"Temporary URL (1 hour): {signed_url}")

# Short-term presigned URL
temp_url = private_path.as_url(presign=True, expire_seconds=300)  # 5 minutes
print(f"Short-term URL: {temp_url}")

Cache Management

# Work with cached files
cloud_path = CloudPath("s3://my-bucket/large-file.dat")

# Get local cached path (downloads if needed)
local_cache_path = cloud_path.fspath
print(f"Cached at: {local_cache_path}")

# Work with local copy
with open(local_cache_path, 'rb') as f:
    data = f.read(1024)

# Clear cache when done
cloud_path.clear_cache()

# Force re-download on next access
fresh_cache_path = cloud_path.fspath

File Movement and Deletion

# Move/rename files
old_path = CloudPath("s3://my-bucket/old-name.txt")
new_path = CloudPath("s3://my-bucket/new-name.txt")

# Rename file
renamed = old_path.rename(new_path)
print(f"Renamed to: {renamed}")

# Move to different directory
source = CloudPath("s3://my-bucket/temp/file.txt")
target = CloudPath("s3://my-bucket/archive/file.txt")
moved = source.replace(target)

# Delete files
file_to_delete = CloudPath("s3://my-bucket/unwanted.txt")
file_to_delete.unlink()

# Safe deletion
try:
    file_to_delete.unlink()
except CloudPathFileNotFoundError:
    print("File already deleted")

# Safe deletion with missing_ok
file_to_delete.unlink(missing_ok=True)

Batch Operations

# Batch upload
local_files = pathlib.Path("local_data/").glob("*.csv")
cloud_base = CloudPath("s3://my-bucket/csv-data/")

for local_file in local_files:
    cloud_file = cloud_base / local_file.name
    cloud_file.upload_from(local_file)
    print(f"Uploaded {local_file.name}")

# Batch download
cloud_files = CloudPath("s3://my-bucket/results/").glob("*.json")
local_base = pathlib.Path("downloads/")
local_base.mkdir(exist_ok=True)

for cloud_file in cloud_files:
    local_file = local_base / cloud_file.name
    cloud_file.download_to(local_file)
    print(f"Downloaded {cloud_file.name}")

Monitoring Transfer Progress

def upload_with_progress(local_path, cloud_path):
    """Upload with progress monitoring."""
    file_size = local_path.stat().st_size
    print(f"Uploading {local_path.name} ({file_size} bytes)")
    
    # Upload file
    result = cloud_path.upload_from(local_path)
    print(f"Upload complete: {result}")
    return result

def download_with_progress(cloud_path, local_path):
    """Download with progress monitoring."""
    print(f"Downloading {cloud_path.name}")
    
    # Download file
    result = cloud_path.download_to(local_path)
    downloaded_size = result.stat().st_size
    print(f"Downloaded {downloaded_size} bytes to {result}")
    return result

# Usage
local_file = pathlib.Path("data.csv")
cloud_file = CloudPath("s3://my-bucket/data.csv")

upload_with_progress(local_file, cloud_file)
download_with_progress(cloud_file, "downloaded.csv")

Working with Temporary URLs

import requests

# Generate temporary download URL
cloud_path = CloudPath("s3://private-bucket/report.pdf")
download_url = cloud_path.as_url(presign=True, expire_seconds=1800)  # 30 minutes

# Use URL with external tools
response = requests.get(download_url)
with open("downloaded_report.pdf", "wb") as f:
    f.write(response.content)

# Generate upload URL (for some cloud providers)
upload_path = CloudPath("s3://uploads-bucket/new-file.txt")
upload_url = upload_path.as_url(presign=True, expire_seconds=600)  # 10 minutes

# External service can upload directly to this URL
print(f"Upload URL: {upload_url}")

Advanced Copy Operations

# Copy with metadata preservation
source = CloudPath("s3://source/important.doc")
destination = CloudPath("s3://backup/important.doc")

# Copy with cloud-specific options (varies by provider)
source.copy(destination, preserve_metadata=True)

# Conditional copying based on modification time
if not destination.exists() or source.stat().st_mtime > destination.stat().st_mtime:
    print("Source is newer, copying...")
    source.copy(destination)
else:
    print("Destination is up to date")

# Copy with different storage class (cloud-specific)
source.copy(destination, storage_class='GLACIER')  # S3 example

Install with Tessl CLI

npx tessl i tessl/pypi-cloudpathlib

docs

anypath.md

azure-integration.md

client-management.md

cloud-operations.md

configuration.md

core-operations.md

directory-operations.md

exceptions.md

file-io.md

gcs-integration.md

http-support.md

index.md

patching.md

s3-integration.md

tile.json