Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Directory management capabilities including creation, deletion, listing, traversal, and pattern matching. These operations provide cloud-native directory handling that works consistently across different cloud storage services, even those without traditional directory concepts.
Check if paths exist and determine their types.
def exists(self) -> bool:
"""
Check if path exists in cloud storage.
Returns:
True if path exists
"""
def is_file(self) -> bool:
"""
Check if path is a file.
Returns:
True if path points to a file
"""
def is_dir(self) -> bool:
"""
Check if path is a directory.
Returns:
True if path points to a directory
"""Create directories with flexible parent handling.
def mkdir(
self,
parents: bool = False,
exist_ok: bool = False
) -> None:
"""
Create directory.
Args:
parents: Create parent directories if needed
exist_ok: Don't raise error if directory exists
Raises:
CloudPathFileExistsError: Directory exists and exist_ok=False
CloudPathNotExistsError: Parent doesn't exist and parents=False
"""List directory contents and iterate over files and subdirectories.
def iterdir(self) -> typing.Iterator["CloudPath"]:
"""
Iterate over directory contents.
Returns:
Iterator of CloudPath objects for directory contents
Raises:
CloudPathNotADirectoryError: Path is not a directory
"""Remove directories and directory trees.
def rmdir(self) -> None:
"""
Remove empty directory.
Raises:
DirectoryNotEmptyError: Directory contains files
CloudPathNotExistsError: Directory doesn't exist
"""
def rmtree(self) -> None:
"""
Remove directory tree recursively.
Removes all files and subdirectories.
"""Find files and directories using glob patterns.
def glob(self, pattern: str) -> typing.Iterator["CloudPath"]:
"""
Find paths matching glob pattern.
Args:
pattern: Glob pattern (e.g., "*.txt", "data/*")
Returns:
Iterator of matching CloudPath objects
"""
def rglob(self, pattern: str) -> typing.Iterator["CloudPath"]:
"""
Find paths matching pattern recursively.
Args:
pattern: Glob pattern to match
Returns:
Iterator of matching CloudPath objects in all subdirectories
"""Traverse directory trees with full control over traversal order.
def walk(
self,
top_down: bool = True
) -> typing.Iterator[typing.Tuple["CloudPath", typing.List[str], typing.List[str]]]:
"""
Walk directory tree.
Args:
top_down: Visit directories top-down if True, bottom-up if False
Returns:
Iterator of (directory_path, subdirectory_names, file_names) tuples
"""from cloudpathlib import CloudPath
# Check if directory exists
dir_path = CloudPath("s3://my-bucket/data/")
if dir_path.exists():
print("Directory exists")
# Check path type
if dir_path.is_dir():
print("This is a directory")
elif dir_path.is_file():
print("This is a file")# Create single directory
dir_path = CloudPath("s3://my-bucket/new-folder/")
dir_path.mkdir(exist_ok=True)
# Create nested directories
nested_path = CloudPath("s3://my-bucket/level1/level2/level3/")
nested_path.mkdir(parents=True, exist_ok=True)
# Handle creation errors
try:
dir_path.mkdir()
except CloudPathFileExistsError:
print("Directory already exists")# List all items in directory
dir_path = CloudPath("s3://my-bucket/data/")
for item in dir_path.iterdir():
if item.is_file():
print(f"File: {item.name}")
elif item.is_dir():
print(f"Directory: {item.name}")
# Get lists of files and directories
files = [item for item in dir_path.iterdir() if item.is_file()]
dirs = [item for item in dir_path.iterdir() if item.is_dir()]# Find all text files
base_path = CloudPath("s3://my-bucket/")
# Non-recursive glob
txt_files = list(base_path.glob("*.txt"))
print(f"Found {len(txt_files)} .txt files")
# Recursive glob
all_txt_files = list(base_path.rglob("*.txt"))
print(f"Found {len(all_txt_files)} .txt files recursively")
# Complex patterns
csv_files = list(base_path.glob("data/**/*.csv"))
log_files = list(base_path.rglob("logs/*.log"))# Multiple file extensions
base_path = CloudPath("s3://my-bucket/")
# Find multiple types
data_files = []
for pattern in ["*.csv", "*.json", "*.parquet"]:
data_files.extend(base_path.rglob(pattern))
# Find files with specific naming
report_files = list(base_path.glob("reports/report-*.pdf"))
dated_logs = list(base_path.glob("logs/2024-*/access.log"))# Walk entire directory tree
base_path = CloudPath("s3://my-bucket/data/")
for root, dirs, files in base_path.walk():
print(f"Directory: {root}")
print(f" Subdirectories: {dirs}")
print(f" Files: {files}")
print()
# Process all files recursively
for root, dirs, files in base_path.walk():
for filename in files:
file_path = root / filename
if file_path.suffix == '.txt':
process_text_file(file_path)# Create directory structure
base = CloudPath("s3://my-bucket/project/")
(base / "src").mkdir(parents=True, exist_ok=True)
(base / "tests").mkdir(exist_ok=True)
(base / "docs").mkdir(exist_ok=True)
(base / "data" / "raw").mkdir(parents=True, exist_ok=True)
(base / "data" / "processed").mkdir(exist_ok=True)
# Remove directory tree
old_project = CloudPath("s3://my-bucket/old-project/")
if old_project.exists():
old_project.rmtree()from cloudpathlib import DirectoryNotEmptyError, CloudPathNotExistsError
dir_path = CloudPath("s3://my-bucket/temp/")
# Safe directory removal
try:
dir_path.rmdir() # Remove empty directory
except DirectoryNotEmptyError:
print("Directory not empty, use rmtree() to remove recursively")
dir_path.rmtree()
except CloudPathNotExistsError:
print("Directory doesn't exist")
# Check before operations
if dir_path.exists() and dir_path.is_dir():
# Safe to perform directory operations
for item in dir_path.iterdir():
print(item)# Find files by extension
base_path = CloudPath("s3://my-bucket/")
# All Python files
py_files = list(base_path.rglob("*.py"))
# All image files
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.gif"]
images = []
for ext in image_extensions:
images.extend(base_path.rglob(ext))
# Find configuration files
config_files = list(base_path.rglob("config.*"))def get_directory_size(dir_path):
"""Calculate total size of directory."""
total_size = 0
file_count = 0
for root, dirs, files in dir_path.walk():
for filename in files:
file_path = root / filename
try:
stats = file_path.stat()
total_size += stats.st_size
file_count += 1
except Exception:
continue
return total_size, file_count
# Usage
dir_path = CloudPath("s3://my-bucket/data/")
size, count = get_directory_size(dir_path)
print(f"Directory contains {count} files totaling {size} bytes")# Organize files by type
source_dir = CloudPath("s3://my-bucket/uploads/")
target_base = CloudPath("s3://my-bucket/organized/")
# Create organization structure
(target_base / "images").mkdir(parents=True, exist_ok=True)
(target_base / "documents").mkdir(exist_ok=True)
(target_base / "data").mkdir(exist_ok=True)
# Organize by file type
for file_path in source_dir.rglob("*"):
if file_path.is_file():
if file_path.suffix.lower() in ['.jpg', '.png', '.gif']:
target = target_base / "images" / file_path.name
elif file_path.suffix.lower() in ['.pdf', '.doc', '.txt']:
target = target_base / "documents" / file_path.name
elif file_path.suffix.lower() in ['.csv', '.json', '.xml']:
target = target_base / "data" / file_path.name
else:
continue
file_path.copy(target)Install with Tessl CLI
npx tessl i tessl/pypi-cloudpathlib