Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive exception hierarchy for precise error handling across different cloud providers and operation types. CloudPathLib provides specific exceptions for common cloud storage scenarios, enabling robust error handling and debugging.
CloudPathLib defines a comprehensive exception hierarchy for precise error handling.
class CloudPathException(Exception):
"""Base exception for all CloudPathLib errors."""
class AnyPathTypeError(CloudPathException):
"""Raised when AnyPath receives invalid input type."""
class ClientMismatchError(CloudPathException):
"""Raised when wrong client type is used for a path."""
class CloudPathFileExistsError(CloudPathException, FileExistsError):
"""Raised when attempting to create a file that already exists."""
class CloudPathFileNotFoundError(CloudPathException, FileNotFoundError):
"""Raised when attempting to access a file that does not exist."""
class CloudPathIsADirectoryError(CloudPathException, IsADirectoryError):
"""Raised when file operation is attempted on a directory."""
class CloudPathNotADirectoryError(CloudPathException, NotADirectoryError):
"""Raised when directory operation is attempted on a file."""
class CloudPathNotExistsError(CloudPathException):
"""Raised when attempting to access a path that does not exist."""
class CloudPathNotImplementedError(CloudPathException, NotImplementedError):
"""Raised when an operation is not supported by the cloud provider."""
class DirectoryNotEmptyError(CloudPathException):
"""Raised when attempting to remove a non-empty directory."""
class IncompleteImplementationError(CloudPathException):
"""Raised when cloud implementation is missing required components."""
class InvalidPrefixError(CloudPathException):
"""Raised when an invalid cloud path prefix is used."""
class InvalidConfigurationException(CloudPathException):
"""Raised when client configuration is invalid."""
class MissingCredentialsError(CloudPathException):
"""Raised when required authentication credentials are missing."""
class MissingDependenciesError(CloudPathException):
"""Raised when required cloud provider dependencies are not installed."""
class NoStatError(CloudPathException):
"""Raised when file statistics cannot be retrieved."""
class OverwriteDirtyFileError(CloudPathException):
"""Raised when local cached file has been modified and conflicts with cloud version."""
class OverwriteNewerCloudError(CloudPathException):
"""Raised when cloud file is newer than local cached version."""
class OverwriteNewerLocalError(CloudPathException):
"""Raised when local cached file is newer than cloud version."""
class InvalidGlobArgumentsError(CloudPathException):
"""Raised when invalid arguments are passed to glob operations."""from cloudpathlib import (
CloudPath,
CloudPathFileNotFoundError,
MissingCredentialsError,
InvalidPrefixError
)
def safe_read_file(path_str):
"""Safely read a cloud file with error handling."""
try:
path = CloudPath(path_str)
return path.read_text()
except CloudPathFileNotFoundError:
print(f"File not found: {path_str}")
return None
except MissingCredentialsError:
print("Authentication credentials not configured")
return None
except InvalidPrefixError:
print(f"Invalid cloud path prefix: {path_str}")
return None
except CloudPathException as e:
print(f"CloudPath error: {e}")
return None
# Usage
content = safe_read_file("s3://my-bucket/file.txt")
if content:
print("File read successfully")from cloudpathlib import CloudPath
import boto3.exceptions
import google.api_core.exceptions
import azure.core.exceptions
def robust_cloud_operation(path_str):
"""Handle errors from different cloud providers."""
try:
path = CloudPath(path_str)
return path.read_text()
# CloudPathLib-specific exceptions
except CloudPathFileNotFoundError:
print("File not found in cloud storage")
except MissingCredentialsError:
print("Cloud credentials not configured")
# AWS-specific exceptions
except boto3.exceptions.Boto3Error as e:
print(f"AWS error: {e}")
except boto3.exceptions.NoCredentialsError:
print("AWS credentials not found")
# Google Cloud-specific exceptions
except google.api_core.exceptions.GoogleAPIError as e:
print(f"Google Cloud error: {e}")
except google.api_core.exceptions.PermissionDenied:
print("Google Cloud permission denied")
# Azure-specific exceptions
except azure.core.exceptions.AzureError as e:
print(f"Azure error: {e}")
except azure.core.exceptions.ClientAuthenticationError:
print("Azure authentication failed")
# Generic exceptions
except PermissionError:
print("Permission denied")
except Exception as e:
print(f"Unexpected error: {e}")
return Nonefrom cloudpathlib import (
CloudPath,
CloudPathFileExistsError,
CloudPathIsADirectoryError,
DirectoryNotEmptyError
)
def safe_file_operations():
"""Demonstrate error handling for file operations."""
# Safe file creation
def create_file_safe(path_str, content):
try:
path = CloudPath(path_str)
# Check if file already exists
if path.exists():
response = input(f"File {path} exists. Overwrite? (y/n): ")
if response.lower() != 'y':
return False
path.write_text(content)
return True
except CloudPathIsADirectoryError:
print(f"Error: {path_str} is a directory, not a file")
return False
except PermissionError:
print(f"Error: No permission to write to {path_str}")
return False
# Safe directory removal
def remove_directory_safe(path_str):
try:
path = CloudPath(path_str)
path.rmdir()
return True
except CloudPathNotADirectoryError:
print(f"Error: {path_str} is not a directory")
return False
except DirectoryNotEmptyError:
print(f"Error: Directory {path_str} is not empty")
# Offer to remove recursively
response = input("Remove recursively? (y/n): ")
if response.lower() == 'y':
path.rmtree()
return True
return False
except CloudPathFileNotFoundError:
print(f"Directory {path_str} does not exist")
return False
# Usage examples
create_file_safe("s3://bucket/file.txt", "Hello, world!")
remove_directory_safe("s3://bucket/empty-dir/")
safe_file_operations()import time
import random
from cloudpathlib import CloudPath, CloudPathException
def retry_cloud_operation(func, max_retries=3, backoff_factor=1.0):
"""Retry cloud operations with exponential backoff."""
for attempt in range(max_retries):
try:
return func()
except (CloudPathFileNotFoundError, CloudPathNotExistsError):
# Don't retry for missing files
raise
except MissingCredentialsError:
# Don't retry for credential issues
raise
except CloudPathException as e:
if attempt == max_retries - 1:
# Last attempt, re-raise the exception
raise
# Calculate backoff time
backoff_time = backoff_factor * (2 ** attempt) + random.uniform(0, 1)
print(f"Attempt {attempt + 1} failed: {e}")
print(f"Retrying in {backoff_time:.2f} seconds...")
time.sleep(backoff_time)
except Exception as e:
# For non-CloudPath exceptions, only retry once
if attempt == 0:
print(f"Unexpected error: {e}, retrying once...")
time.sleep(1)
else:
raise
# Usage
def upload_file_with_retry():
def upload_operation():
path = CloudPath("s3://unreliable-bucket/file.txt")
path.write_text("Important data")
return path
try:
result = retry_cloud_operation(upload_operation, max_retries=3)
print(f"Upload successful: {result}")
except Exception as e:
print(f"Upload failed after retries: {e}")
upload_file_with_retry()from cloudpathlib import (
S3Client,
GSClient,
AzureBlobClient,
InvalidConfigurationException,
MissingDependenciesError
)
def configure_client_safe(provider, **config):
"""Safely configure cloud client with error handling."""
try:
if provider == "s3":
return S3Client(**config)
elif provider == "gs":
return GSClient(**config)
elif provider == "azure":
return AzureBlobClient(**config)
else:
raise ValueError(f"Unknown provider: {provider}")
except MissingDependenciesError as e:
print(f"Missing dependencies for {provider}: {e}")
print(f"Install with: pip install cloudpathlib[{provider}]")
return None
except InvalidConfigurationException as e:
print(f"Invalid configuration for {provider}: {e}")
return None
except Exception as e:
print(f"Unexpected error configuring {provider} client: {e}")
return None
# Usage
configs = {
"s3": {"aws_access_key_id": "key", "aws_secret_access_key": "secret"},
"gs": {"application_credentials": "path/to/creds.json"},
"azure": {"connection_string": "connection_string"}
}
clients = {}
for provider, config in configs.items():
client = configure_client_safe(provider, **config)
if client:
clients[provider] = client
print(f"{provider} client configured successfully")from cloudpathlib import CloudPath, CloudPathException
from concurrent.futures import ThreadPoolExecutor, as_completed
def process_files_batch(file_paths, process_func):
"""Process multiple files with error handling."""
results = []
errors = []
def safe_process(path_str):
try:
path = CloudPath(path_str)
result = process_func(path)
return {"path": path_str, "result": result, "success": True}
except CloudPathFileNotFoundError:
return {"path": path_str, "error": "File not found", "success": False}
except PermissionError:
return {"path": path_str, "error": "Permission denied", "success": False}
except CloudPathException as e:
return {"path": path_str, "error": str(e), "success": False}
except Exception as e:
return {"path": path_str, "error": f"Unexpected: {e}", "success": False}
# Process files concurrently
with ThreadPoolExecutor(max_workers=5) as executor:
future_to_path = {
executor.submit(safe_process, path): path
for path in file_paths
}
for future in as_completed(future_to_path):
result = future.result()
if result["success"]:
results.append(result)
else:
errors.append(result)
print(f"Error processing {result['path']}: {result['error']}")
return results, errors
# Usage
def read_file_size(path):
"""Example processing function."""
return path.stat().st_size
file_list = [
"s3://bucket/file1.txt",
"s3://bucket/file2.txt",
"s3://bucket/nonexistent.txt",
"gs://bucket/file3.txt"
]
successful_results, failed_results = process_files_batch(file_list, read_file_size)
print(f"Successfully processed {len(successful_results)} files")
print(f"Failed to process {len(failed_results)} files")from cloudpathlib import CloudPathException
class DataProcessingError(CloudPathException):
"""Custom exception for data processing errors."""
pass
class InvalidDataFormatError(DataProcessingError):
"""Raised when data format is invalid."""
pass
def process_data_file(path_str):
"""Process data file with custom exception handling."""
try:
path = CloudPath(path_str)
if not path.exists():
raise CloudPathFileNotFoundError(f"Data file not found: {path_str}")
content = path.read_text()
# Custom validation
if not content.strip():
raise InvalidDataFormatError(f"Data file is empty: {path_str}")
if not content.startswith("DATA_VERSION"):
raise InvalidDataFormatError(f"Invalid data format: {path_str}")
# Process data...
processed = content.upper()
output_path = path.with_stem(path.stem + "_processed")
output_path.write_text(processed)
return output_path
except InvalidDataFormatError as e:
print(f"Data format error: {e}")
raise # Re-raise custom exception
except DataProcessingError as e:
print(f"Data processing error: {e}")
raise
except CloudPathException as e:
print(f"Cloud storage error: {e}")
raise DataProcessingError(f"Failed to process {path_str}") from e
# Usage with custom exception handling
try:
result = process_data_file("s3://data-bucket/dataset.txt")
print(f"Processing complete: {result}")
except InvalidDataFormatError:
print("Fix data format and try again")
except DataProcessingError:
print("Data processing failed")
except Exception as e:
print(f"Unexpected error: {e}")import logging
from cloudpathlib import CloudPath, CloudPathException
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def monitored_cloud_operation(path_str, operation):
"""Perform cloud operation with comprehensive logging."""
logger.info(f"Starting {operation} on {path_str}")
try:
path = CloudPath(path_str)
if operation == "read":
result = path.read_text()
logger.info(f"Successfully read {len(result)} characters from {path_str}")
return result
elif operation == "exists":
result = path.exists()
logger.info(f"Existence check for {path_str}: {result}")
return result
elif operation == "stat":
result = path.stat()
logger.info(f"File stats for {path_str}: size={result.st_size}")
return result
else:
raise ValueError(f"Unknown operation: {operation}")
except CloudPathFileNotFoundError as e:
logger.warning(f"File not found during {operation}: {path_str}")
raise
except MissingCredentialsError as e:
logger.error(f"Credentials missing for {operation} on {path_str}")
raise
except CloudPathException as e:
logger.error(f"CloudPath error during {operation} on {path_str}: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error during {operation} on {path_str}: {e}")
raise
# Usage with monitoring
try:
content = monitored_cloud_operation("s3://bucket/file.txt", "read")
stats = monitored_cloud_operation("s3://bucket/file.txt", "stat")
except CloudPathException as e:
logger.error(f"Operation failed: {e}")from cloudpathlib import CloudPath, CloudPathFileNotFoundError
def resilient_data_access(primary_path, backup_paths=None):
"""Access data with fallback to backup locations."""
backup_paths = backup_paths or []
all_paths = [primary_path] + backup_paths
last_exception = None
for i, path_str in enumerate(all_paths):
try:
path = CloudPath(path_str)
content = path.read_text()
if i > 0:
print(f"Successfully accessed backup path: {path_str}")
return content
except CloudPathFileNotFoundError as e:
last_exception = e
if i < len(all_paths) - 1:
print(f"Primary path failed, trying backup: {all_paths[i + 1]}")
continue
except CloudPathException as e:
last_exception = e
print(f"Error accessing {path_str}: {e}")
continue
# All paths failed
raise CloudPathFileNotFoundError(
f"Could not access data from any location. Last error: {last_exception}"
)
# Usage
try:
data = resilient_data_access(
primary_path="s3://primary-bucket/data.txt",
backup_paths=[
"s3://backup-bucket/data.txt",
"gs://backup-bucket/data.txt",
"/local/backup/data.txt"
]
)
print("Data accessed successfully")
except CloudPathException as e:
print(f"All data sources failed: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-cloudpathlib