Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
AnyPath provides intelligent dispatching between cloud paths and local filesystem paths, enabling code that works seamlessly with both local and cloud storage. This universal interface allows you to write path-agnostic code that automatically handles different storage backends.
Polymorphic constructor that automatically dispatches to the appropriate path type.
class AnyPath:
"""Universal path constructor."""
def __new__(
cls,
*args,
**kwargs
) -> typing.Union[CloudPath, "pathlib.Path"]:
"""
Create appropriate path type based on input.
Args:
*args: Path arguments
**kwargs: Additional arguments
Returns:
CloudPath instance for cloud URIs, pathlib.Path for local paths
"""
@classmethod
def validate(cls, v):
"""
Pydantic validator for AnyPath instances.
Args:
v: Value to validate
Returns:
Validated path object
"""Utility functions for path conversion and handling.
def to_anypath(
s: typing.Union[str, "os.PathLike"]
) -> typing.Union[CloudPath, "pathlib.Path"]:
"""
Convert string or PathLike to appropriate path type.
Args:
s: String or path-like object
Returns:
CloudPath for cloud URIs, pathlib.Path for local paths
"""from cloudpathlib import AnyPath
# Automatically dispatches to appropriate path type
cloud_path = AnyPath("s3://my-bucket/file.txt")
print(type(cloud_path)) # <class 'cloudpathlib.s3.S3Path'>
local_path = AnyPath("/home/user/file.txt")
print(type(local_path)) # <class 'pathlib.PosixPath'>
windows_path = AnyPath("C:\\Users\\user\\file.txt")
print(type(windows_path)) # <class 'pathlib.WindowsPath'>
# Works with different cloud providers
gcs_path = AnyPath("gs://my-bucket/file.txt")
azure_path = AnyPath("az://my-container/file.txt")
http_path = AnyPath("https://example.com/file.txt")def process_file(path_str):
"""Process file regardless of storage location."""
path = AnyPath(path_str)
# Same API works for both local and cloud paths
if path.exists():
content = path.read_text()
# Process content
processed = content.upper()
# Write back to same location
output_path = path.with_stem(path.stem + "_processed")
output_path.write_text(processed)
return output_path
else:
raise FileNotFoundError(f"File not found: {path}")
# Works with any path type
local_result = process_file("/tmp/local_file.txt")
s3_result = process_file("s3://bucket/cloud_file.txt")
gcs_result = process_file("gs://bucket/gcs_file.txt")import os
from pathlib import Path
def get_data_path(filename):
"""Get data path based on environment configuration."""
storage_type = os.getenv("STORAGE_TYPE", "local")
if storage_type == "local":
base_path = os.getenv("LOCAL_DATA_DIR", "./data")
return AnyPath(base_path) / filename
elif storage_type == "s3":
bucket = os.getenv("S3_BUCKET", "default-bucket")
return AnyPath(f"s3://{bucket}/data") / filename
elif storage_type == "gcs":
bucket = os.getenv("GCS_BUCKET", "default-bucket")
return AnyPath(f"gs://{bucket}/data") / filename
else:
raise ValueError(f"Unknown storage type: {storage_type}")
# Usage - works with any configured storage
data_file = get_data_path("dataset.csv")
print(f"Using: {data_file}")
# Read/write operations work the same regardless of backend
if data_file.exists():
data = data_file.read_text()
else:
data_file.write_text("id,name,value\n1,test,100")def process_file_list(file_paths):
"""Process list of files from different storage locations."""
results = []
for path_str in file_paths:
path = AnyPath(path_str)
print(f"Processing {path} (type: {type(path).__name__})")
if path.exists():
# Same operations work for all path types
size = path.stat().st_size
modified = path.stat().st_mtime
results.append({
'path': str(path),
'type': type(path).__name__,
'size': size,
'modified': modified
})
else:
print(f"Skipping non-existent file: {path}")
return results
# Mix of local and cloud paths
mixed_paths = [
"/home/user/local_file.txt",
"s3://my-bucket/s3_file.txt",
"gs://my-bucket/gcs_file.txt",
"az://my-container/azure_file.txt",
"C:\\Users\\user\\windows_file.txt"
]
results = process_file_list(mixed_paths)
for result in results:
print(f"{result['type']}: {result['path']} ({result['size']} bytes)")class DataPipeline:
"""Data pipeline that works with any storage backend."""
def __init__(self, input_path, output_path, temp_dir=None):
self.input_path = AnyPath(input_path)
self.output_path = AnyPath(output_path)
self.temp_dir = AnyPath(temp_dir) if temp_dir else None
def process(self):
"""Run the pipeline."""
print(f"Input: {self.input_path} ({type(self.input_path).__name__})")
print(f"Output: {self.output_path} ({type(self.output_path).__name__})")
# Read input data
raw_data = self.input_path.read_text()
# Process data
processed_data = self.transform_data(raw_data)
# Write temporary result if temp directory specified
if self.temp_dir:
temp_file = self.temp_dir / f"temp_{self.input_path.name}"
temp_file.parent.mkdir(parents=True, exist_ok=True)
temp_file.write_text(processed_data)
print(f"Temp file: {temp_file}")
# Write final output
self.output_path.parent.mkdir(parents=True, exist_ok=True)
self.output_path.write_text(processed_data)
return self.output_path
def transform_data(self, data):
"""Transform the data (example transformation)."""
lines = data.strip().split('\n')
processed_lines = [f"PROCESSED: {line}" for line in lines]
return '\n'.join(processed_lines)
# Works with any combination of storage types
pipeline1 = DataPipeline(
input_path="s3://source-bucket/raw_data.txt",
output_path="/tmp/processed_data.txt",
temp_dir="gs://temp-bucket/pipeline-temp/"
)
pipeline2 = DataPipeline(
input_path="/home/user/input.txt",
output_path="az://output-container/result.txt"
)
# Same interface, different storage backends
result1 = pipeline1.process()
result2 = pipeline2.process()def resolve_path(path_spec):
"""Resolve path specification to actual path."""
if isinstance(path_spec, dict):
# Dynamic path specification
storage_type = path_spec.get('type', 'local')
if storage_type == 'local':
base_dir = path_spec.get('base_dir', '.')
filename = path_spec['filename']
return AnyPath(base_dir) / filename
elif storage_type == 's3':
bucket = path_spec['bucket']
key = path_spec['key']
return AnyPath(f"s3://{bucket}/{key}")
elif storage_type == 'gcs':
bucket = path_spec['bucket']
blob = path_spec['blob']
return AnyPath(f"gs://{bucket}/{blob}")
elif storage_type == 'azure':
container = path_spec['container']
blob = path_spec['blob']
return AnyPath(f"az://{container}/{blob}")
else:
# Direct path specification
return AnyPath(path_spec)
# Example path specifications
path_specs = [
"/direct/local/path.txt",
"s3://direct-bucket/file.txt",
{
'type': 'local',
'base_dir': '/home/user/data',
'filename': 'config.json'
},
{
'type': 's3',
'bucket': 'my-data-bucket',
'key': 'processed/results.csv'
},
{
'type': 'gcs',
'bucket': 'analytics-bucket',
'blob': 'reports/monthly.pdf'
}
]
# Resolve all specifications
resolved_paths = [resolve_path(spec) for spec in path_specs]
for original, resolved in zip(path_specs, resolved_paths):
print(f"{original} -> {resolved} ({type(resolved).__name__})")import tempfile
import pytest
class TestDataProcessor:
"""Test data processor with different storage backends."""
def setup_test_data(self, storage_type="local"):
"""Setup test data for different storage types."""
if storage_type == "local":
temp_dir = tempfile.mkdtemp()
test_file = AnyPath(temp_dir) / "test_data.txt"
else:
# Use environment variables for cloud testing
if storage_type == "s3":
test_file = AnyPath("s3://test-bucket/test_data.txt")
elif storage_type == "gcs":
test_file = AnyPath("gs://test-bucket/test_data.txt")
else:
pytest.skip(f"Storage type {storage_type} not configured for testing")
# Same setup code works for all storage types
test_file.write_text("line1\nline2\nline3")
return test_file
@pytest.mark.parametrize("storage_type", ["local", "s3", "gcs"])
def test_file_processing(self, storage_type):
"""Test file processing with different storage backends."""
test_file = self.setup_test_data(storage_type)
# Process file
result = process_file(str(test_file))
# Verify results work the same way
assert result.exists()
content = result.read_text()
assert "LINE1" in content # Assuming process_file converts to uppercase
# Cleanup
if storage_type == "local":
result.unlink()
test_file.unlink()
# Usage in configuration management
def load_config(config_path_spec):
"""Load configuration from various sources."""
config_path = AnyPath(config_path_spec)
if config_path.exists():
return json.loads(config_path.read_text())
else:
# Return default config
return {"default": True}
# Works with any path type
local_config = load_config("./config.json")
s3_config = load_config("s3://config-bucket/prod-config.json")
gcs_config = load_config("gs://config-bucket/staging-config.json")from cloudpathlib import to_anypath
# Convert various inputs to appropriate path types
paths = [
"/local/file.txt",
"s3://bucket/file.txt",
Path("/another/local/file.txt"),
"gs://bucket/data.json",
"https://example.com/api/data"
]
converted_paths = [to_anypath(p) for p in paths]
for original, converted in zip(paths, converted_paths):
print(f"{original} -> {type(converted).__name__}")
# Use in functions that accept string or path objects
def safe_read_file(path_input):
"""Safely read file from string or path object."""
path = to_anypath(path_input)
try:
return path.read_text()
except Exception as e:
print(f"Error reading {path}: {e}")
return None
# Works with any input type
content1 = safe_read_file("/tmp/file.txt")
content2 = safe_read_file("s3://bucket/file.txt")
content3 = safe_read_file(Path("/home/user/file.txt"))from pydantic import BaseModel
from cloudpathlib import AnyPath
class DataConfig(BaseModel):
"""Configuration model with path validation."""
input_path: AnyPath
output_path: AnyPath
temp_dir: AnyPath = None
class Config:
# Allow AnyPath types
arbitrary_types_allowed = True
# Validation works with any path type
config_data = {
"input_path": "s3://source-bucket/data.csv",
"output_path": "/tmp/processed.csv",
"temp_dir": "gs://temp-bucket/workspace/"
}
config = DataConfig(**config_data)
print(f"Input: {config.input_path} ({type(config.input_path).__name__})")
print(f"Output: {config.output_path} ({type(config.output_path).__name__})")
print(f"Temp: {config.temp_dir} ({type(config.temp_dir).__name__})")
# Use validated paths
if config.input_path.exists():
data = config.input_path.read_text()
config.output_path.write_text(data.upper())Install with Tessl CLI
npx tessl i tessl/pypi-cloudpathlib