Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.
npx @tessl/cli install tessl/pypi-cloudpathlib@0.22.0A comprehensive Python library that extends pathlib functionality to work seamlessly with cloud storage services. CloudPathLib provides a familiar pathlib-like interface for cloud URIs, enabling developers to read, write, and manipulate files in cloud storage using the same intuitive patterns they use for local filesystem operations.
pip install cloudpathlib[all] (or specify cloud providers: [s3,gs,azure])from cloudpathlib import CloudPath, AnyPath, implementation_registryFor specific cloud providers:
from cloudpathlib import S3Path, GSPath, AzureBlobPath
from cloudpathlib import S3Client, GSClient, AzureBlobClientfrom cloudpathlib import CloudPath
# Works with any supported cloud service
# Automatically dispatches to appropriate implementation based on URI prefix
s3_path = CloudPath("s3://my-bucket/file.txt")
gs_path = CloudPath("gs://my-bucket/file.txt")
azure_path = CloudPath("az://my-container/file.txt")
# Familiar pathlib-style operations
with s3_path.open("w") as f:
f.write("Hello cloud storage!")
# Read content
content = s3_path.read_text()
# Path operations
parent = s3_path.parent
filename = s3_path.name
new_path = s3_path / "subdirectory" / "another_file.txt"
# Directory operations
s3_path.parent.mkdir(parents=True, exist_ok=True)
for item in s3_path.parent.iterdir():
print(item)
# Pattern matching
for txt_file in CloudPath("s3://my-bucket/").glob("**/*.txt"):
print(txt_file)CloudPathLib follows a modular architecture with three key layers:
This design enables seamless switching between cloud providers, local filesystem implementations for testing, and integration with existing pathlib-based code through familiar interfaces.
Essential pathlib-compatible operations for working with cloud storage paths, including path construction, manipulation, and filesystem operations like reading, writing, and directory management.
class CloudPath:
def __init__(self, cloud_path: str, *parts: str, client=None): ...
def __truediv__(self, other: str) -> "CloudPath": ...
def joinpath(self, *pathsegments: str) -> "CloudPath": ...
def with_name(self, name: str) -> "CloudPath": ...
def with_suffix(self, suffix: str) -> "CloudPath": ...
def with_stem(self, stem: str) -> "CloudPath": ...
@property
def name(self) -> str: ...
@property
def stem(self) -> str: ...
@property
def suffix(self) -> str: ...
@property
def parent(self) -> "CloudPath": ...
@property
def parts(self) -> tuple: ...Comprehensive file input/output capabilities with support for text and binary modes, streaming, and cloud-specific optimizations for efficient data transfer.
def open(
self,
mode: str = "r",
buffering: int = -1,
encoding: typing.Optional[str] = None,
errors: typing.Optional[str] = None,
newline: typing.Optional[str] = None,
force_overwrite_from_cloud: typing.Optional[bool] = None,
force_overwrite_to_cloud: typing.Optional[bool] = None
) -> typing.IO: ...
def read_text(self, encoding: str = None, errors: str = None) -> str: ...
def read_bytes(self) -> bytes: ...
def write_text(self, data: str, encoding: str = None, errors: str = None) -> int: ...
def write_bytes(self, data: bytes) -> int: ...Directory management including creation, deletion, listing, traversal, and pattern matching with glob support for recursive searches.
def exists(self) -> bool: ...
def is_file(self) -> bool: ...
def is_dir(self) -> bool: ...
def iterdir(self) -> typing.Iterator["CloudPath"]: ...
def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: ...
def rmdir(self) -> None: ...
def rmtree(self) -> None: ...
def glob(self, pattern: str) -> typing.Iterator["CloudPath"]: ...
def rglob(self, pattern: str) -> typing.Iterator["CloudPath"]: ...
def walk(self, top_down: bool = True) -> typing.Iterator[tuple]: ...Advanced cloud storage features including URL generation, presigned URLs, file copying, upload/download, caching, and cloud service metadata access.
def download_to(self, destination: typing.Union[str, "os.PathLike"]) -> "pathlib.Path": ...
def upload_from(self, source: typing.Union[str, "os.PathLike"]) -> "CloudPath": ...
def copy(self, destination: typing.Union[str, "CloudPath"]) -> "CloudPath": ...
def copytree(self, destination: typing.Union[str, "CloudPath"]) -> "CloudPath": ...
def as_url(self, presign: bool = False, expire_seconds: int = 3600) -> str: ...
def clear_cache(self) -> None: ...
def stat(self) -> "os.stat_result": ...Complete AWS S3 support with advanced features including multipart uploads, transfer acceleration, custom endpoints, and S3-specific metadata access.
class S3Path(CloudPath):
@property
def bucket(self) -> str: ...
@property
def key(self) -> str: ...
@property
def etag(self) -> str: ...
class S3Client:
def __init__(
self,
aws_access_key_id: str = None,
aws_secret_access_key: str = None,
aws_session_token: str = None,
profile_name: str = None,
boto3_session = None,
**kwargs
): ...Full Google Cloud Storage support with service account authentication, custom retry policies, concurrent downloads, and GCS-specific features.
class GSPath(CloudPath):
@property
def bucket(self) -> str: ...
@property
def blob(self) -> str: ...
@property
def etag(self) -> str: ...
class GSClient:
def __init__(
self,
credentials = None,
project: str = None,
storage_client = None,
**kwargs
): ...Google Cloud Storage Integration
Azure Blob Storage support with Azure Active Directory authentication, hierarchical namespace support for ADLS Gen2, and Azure-specific blob operations.
class AzureBlobPath(CloudPath):
@property
def container(self) -> str: ...
@property
def blob(self) -> str: ...
@property
def etag(self) -> str: ...
class AzureBlobClient:
def __init__(
self,
account_url: str = None,
credential = None,
connection_string: str = None,
**kwargs
): ...Azure Blob Storage Integration
HTTP and HTTPS resource access with custom authentication, directory listing parsers, and RESTful operations for web-based storage systems.
class HttpPath(CloudPath):
def get(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]: ...
def put(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]: ...
def post(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]: ...
def delete(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]: ...
def head(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]: ...
class HttpClient:
def __init__(
self,
auth = None,
custom_list_page_parser = None,
custom_dir_matcher = None,
**kwargs
): ...AnyPath provides intelligent dispatching between cloud paths and local filesystem paths, enabling code that works seamlessly with both local and cloud storage.
class AnyPath:
def __new__(cls, *args, **kwargs) -> typing.Union[CloudPath, "pathlib.Path"]: ...
@classmethod
def validate(cls, v): ...
def to_anypath(s: typing.Union[str, "os.PathLike"]) -> typing.Union[CloudPath, "pathlib.Path"]: ...Monkey patching capabilities to make Python's built-in functions work transparently with cloud paths, including open(), os functions, and glob operations.
def patch_open(original_open = None) -> None: ...
def patch_os_functions() -> None: ...
def patch_glob() -> None: ...
def patch_all_builtins() -> None: ...Base client functionality for authentication, caching configuration, and cloud service connection management across all supported providers.
class Client:
def __init__(
self,
file_cache_mode: FileCacheMode = None,
local_cache_dir: str = None,
content_type_method = None
): ...
@classmethod
def get_default_client(cls): ...
def set_as_default_client(self) -> None: ...
def clear_cache(self) -> None: ...Configuration options for cache management, file handling modes, and other library settings that control behavior across all cloud providers.
class FileCacheMode(str, Enum):
persistent = "persistent"
tmp_dir = "tmp_dir"
cloudpath_object = "cloudpath_object"
close_file = "close_file"
@classmethod
def from_environment(cls): ...
# Implementation registry for cloud provider management
implementation_registry: typing.Dict[str, "CloudImplementation"]Comprehensive exception hierarchy for precise error handling across different cloud providers and operation types, with specific exceptions for common cloud storage scenarios.
class CloudPathException(Exception): ...
class CloudPathFileNotFoundError(CloudPathException, FileNotFoundError): ...
class MissingCredentialsError(CloudPathException): ...
class InvalidPrefixError(CloudPathException): ...
# ... and 15+ more specific exception types