Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced cloud storage features that go beyond traditional filesystem operations. These capabilities leverage cloud-native features like URL generation, presigned URLs, efficient copying, upload/download operations, caching management, and cloud service metadata access.
Transfer files between local filesystem and cloud storage.
def download_to(
self,
destination: typing.Union[str, "os.PathLike"]
) -> "pathlib.Path":
"""
Download cloud file to local destination.
Args:
destination: Local path for downloaded file
Returns:
pathlib.Path object for downloaded file
"""
def upload_from(
self,
source: typing.Union[str, "os.PathLike"],
**kwargs
) -> "CloudPath":
"""
Upload local file to cloud storage.
Args:
source: Local file path to upload
**kwargs: Cloud-specific upload options
Returns:
CloudPath object for uploaded file
"""Copy files and directories within and between cloud storage locations.
def copy(
self,
destination: typing.Union[str, "CloudPath"],
**kwargs
) -> "CloudPath":
"""
Copy file to destination.
Args:
destination: Target path for copy
**kwargs: Cloud-specific copy options
Returns:
CloudPath object for copied file
"""
def copytree(
self,
destination: typing.Union[str, "CloudPath"],
**kwargs
) -> "CloudPath":
"""
Copy directory tree to destination.
Args:
destination: Target path for directory copy
**kwargs: Cloud-specific copy options
Returns:
CloudPath object for copied directory
"""Generate URLs for cloud resources with optional presigning.
def as_url(
self,
presign: bool = False,
expire_seconds: int = 3600
) -> str:
"""
Get URL for cloud resource.
Args:
presign: Generate presigned URL for temporary access
expire_seconds: Expiration time for presigned URLs
Returns:
URL string for the cloud resource
"""Control local file caching behavior.
def clear_cache(self) -> None:
"""
Clear local cache for this path.
Removes any locally cached copy of the file.
"""
@property
def fspath(self) -> str:
"""
Get local filesystem path for cached file.
Downloads and caches file if not already cached.
Returns:
Local path to cached file
"""Move and delete cloud files and directories.
def unlink(self, missing_ok: bool = False) -> None:
"""
Delete file from cloud storage.
Args:
missing_ok: Don't raise error if file doesn't exist
Raises:
CloudPathFileNotFoundError: File not found and missing_ok=False
"""
def rename(self, target: typing.Union[str, "CloudPath"]) -> "CloudPath":
"""
Rename/move file to target location.
Args:
target: New path for the file
Returns:
CloudPath object for renamed file
"""
def replace(self, target: typing.Union[str, "CloudPath"]) -> "CloudPath":
"""
Replace target with this file.
Args:
target: Path to replace
Returns:
CloudPath object for replacement location
"""from cloudpathlib import CloudPath
import pathlib
# Upload local file to cloud
local_file = pathlib.Path("local_data.csv")
cloud_path = CloudPath("s3://my-bucket/data.csv")
# Upload file
cloud_path.upload_from(local_file)
print(f"Uploaded to {cloud_path}")
# Download file from cloud
downloaded_path = cloud_path.download_to("downloaded_data.csv")
print(f"Downloaded to {downloaded_path}")# Upload entire directory
local_dir = pathlib.Path("local_project/")
cloud_dir = CloudPath("s3://my-bucket/project/")
# Upload directory tree
for local_path in local_dir.rglob("*"):
if local_path.is_file():
relative_path = local_path.relative_to(local_dir)
cloud_file = cloud_dir / relative_path
cloud_file.upload_from(local_path)
# Download directory tree
for cloud_file in cloud_dir.rglob("*"):
if cloud_file.is_file():
relative_path = cloud_file.relative_to(cloud_dir)
local_path = pathlib.Path("downloads") / relative_path
local_path.parent.mkdir(parents=True, exist_ok=True)
cloud_file.download_to(local_path)# Copy between cloud providers
s3_path = CloudPath("s3://source-bucket/file.txt")
gs_path = CloudPath("gs://dest-bucket/file.txt")
# Copy file between clouds (via local cache)
s3_path.copy(gs_path)
# Copy within same cloud service
source = CloudPath("s3://my-bucket/source/file.txt")
destination = CloudPath("s3://my-bucket/backup/file.txt")
source.copy(destination)# Copy entire directory tree
source_dir = CloudPath("s3://source-bucket/data/")
dest_dir = CloudPath("s3://backup-bucket/data-backup/")
# Copy directory tree
source_dir.copytree(dest_dir)
# Copy with filtering
source_dir = CloudPath("s3://my-bucket/project/")
backup_dir = CloudPath("s3://backup-bucket/project-backup/")
# Only copy specific file types
for file_path in source_dir.rglob("*"):
if file_path.suffix in ['.py', '.md', '.txt']:
relative_path = file_path.relative_to(source_dir)
dest_path = backup_dir / relative_path
file_path.copy(dest_path)# Get public URL
cloud_path = CloudPath("s3://public-bucket/image.jpg")
public_url = cloud_path.as_url()
print(f"Public URL: {public_url}")
# Generate presigned URL for temporary access
private_path = CloudPath("s3://private-bucket/document.pdf")
signed_url = private_path.as_url(presign=True, expire_seconds=3600)
print(f"Temporary URL (1 hour): {signed_url}")
# Short-term presigned URL
temp_url = private_path.as_url(presign=True, expire_seconds=300) # 5 minutes
print(f"Short-term URL: {temp_url}")# Work with cached files
cloud_path = CloudPath("s3://my-bucket/large-file.dat")
# Get local cached path (downloads if needed)
local_cache_path = cloud_path.fspath
print(f"Cached at: {local_cache_path}")
# Work with local copy
with open(local_cache_path, 'rb') as f:
data = f.read(1024)
# Clear cache when done
cloud_path.clear_cache()
# Force re-download on next access
fresh_cache_path = cloud_path.fspath# Move/rename files
old_path = CloudPath("s3://my-bucket/old-name.txt")
new_path = CloudPath("s3://my-bucket/new-name.txt")
# Rename file
renamed = old_path.rename(new_path)
print(f"Renamed to: {renamed}")
# Move to different directory
source = CloudPath("s3://my-bucket/temp/file.txt")
target = CloudPath("s3://my-bucket/archive/file.txt")
moved = source.replace(target)
# Delete files
file_to_delete = CloudPath("s3://my-bucket/unwanted.txt")
file_to_delete.unlink()
# Safe deletion
try:
file_to_delete.unlink()
except CloudPathFileNotFoundError:
print("File already deleted")
# Safe deletion with missing_ok
file_to_delete.unlink(missing_ok=True)# Batch upload
local_files = pathlib.Path("local_data/").glob("*.csv")
cloud_base = CloudPath("s3://my-bucket/csv-data/")
for local_file in local_files:
cloud_file = cloud_base / local_file.name
cloud_file.upload_from(local_file)
print(f"Uploaded {local_file.name}")
# Batch download
cloud_files = CloudPath("s3://my-bucket/results/").glob("*.json")
local_base = pathlib.Path("downloads/")
local_base.mkdir(exist_ok=True)
for cloud_file in cloud_files:
local_file = local_base / cloud_file.name
cloud_file.download_to(local_file)
print(f"Downloaded {cloud_file.name}")def upload_with_progress(local_path, cloud_path):
"""Upload with progress monitoring."""
file_size = local_path.stat().st_size
print(f"Uploading {local_path.name} ({file_size} bytes)")
# Upload file
result = cloud_path.upload_from(local_path)
print(f"Upload complete: {result}")
return result
def download_with_progress(cloud_path, local_path):
"""Download with progress monitoring."""
print(f"Downloading {cloud_path.name}")
# Download file
result = cloud_path.download_to(local_path)
downloaded_size = result.stat().st_size
print(f"Downloaded {downloaded_size} bytes to {result}")
return result
# Usage
local_file = pathlib.Path("data.csv")
cloud_file = CloudPath("s3://my-bucket/data.csv")
upload_with_progress(local_file, cloud_file)
download_with_progress(cloud_file, "downloaded.csv")import requests
# Generate temporary download URL
cloud_path = CloudPath("s3://private-bucket/report.pdf")
download_url = cloud_path.as_url(presign=True, expire_seconds=1800) # 30 minutes
# Use URL with external tools
response = requests.get(download_url)
with open("downloaded_report.pdf", "wb") as f:
f.write(response.content)
# Generate upload URL (for some cloud providers)
upload_path = CloudPath("s3://uploads-bucket/new-file.txt")
upload_url = upload_path.as_url(presign=True, expire_seconds=600) # 10 minutes
# External service can upload directly to this URL
print(f"Upload URL: {upload_url}")# Copy with metadata preservation
source = CloudPath("s3://source/important.doc")
destination = CloudPath("s3://backup/important.doc")
# Copy with cloud-specific options (varies by provider)
source.copy(destination, preserve_metadata=True)
# Conditional copying based on modification time
if not destination.exists() or source.stat().st_mtime > destination.stat().st_mtime:
print("Source is newer, copying...")
source.copy(destination)
else:
print("Destination is up to date")
# Copy with different storage class (cloud-specific)
source.copy(destination, storage_class='GLACIER') # S3 exampleInstall with Tessl CLI
npx tessl i tessl/pypi-cloudpathlib