tessl/pypi-cloudpathlib

Pathlib-style classes for cloud storage services that provide seamless access to AWS S3, Google Cloud Storage, and Azure Blob Storage with familiar filesystem operations.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Directory Operations

Name: tessl/pypi-cloudpathlib
Author: tessl

Directory management capabilities including creation, deletion, listing, traversal, and pattern matching. These operations provide cloud-native directory handling that works consistently across different cloud storage services, even those without traditional directory concepts.

Capabilities

Directory Existence and Type Checking

Check if paths exist and determine their types.

def exists(self) -> bool:
    """
    Check if path exists in cloud storage.
    
    Returns:
        True if path exists
    """

def is_file(self) -> bool:
    """
    Check if path is a file.
    
    Returns:
        True if path points to a file
    """

def is_dir(self) -> bool:
    """
    Check if path is a directory.
    
    Returns:
        True if path points to a directory
    """

Directory Creation

Create directories with flexible parent handling.

def mkdir(
    self,
    parents: bool = False,
    exist_ok: bool = False
) -> None:
    """
    Create directory.
    
    Args:
        parents: Create parent directories if needed
        exist_ok: Don't raise error if directory exists
        
    Raises:
        CloudPathFileExistsError: Directory exists and exist_ok=False
        CloudPathNotExistsError: Parent doesn't exist and parents=False
    """

Directory Listing

List directory contents and iterate over files and subdirectories.

def iterdir(self) -> typing.Iterator["CloudPath"]:
    """
    Iterate over directory contents.
    
    Returns:
        Iterator of CloudPath objects for directory contents
        
    Raises:
        CloudPathNotADirectoryError: Path is not a directory
    """

Directory Removal

Remove directories and directory trees.

def rmdir(self) -> None:
    """
    Remove empty directory.
    
    Raises:
        DirectoryNotEmptyError: Directory contains files
        CloudPathNotExistsError: Directory doesn't exist
    """

def rmtree(self) -> None:
    """
    Remove directory tree recursively.
    Removes all files and subdirectories.
    """

Pattern Matching

Find files and directories using glob patterns.

def glob(self, pattern: str) -> typing.Iterator["CloudPath"]:
    """
    Find paths matching glob pattern.
    
    Args:
        pattern: Glob pattern (e.g., "*.txt", "data/*")
        
    Returns:
        Iterator of matching CloudPath objects
    """

def rglob(self, pattern: str) -> typing.Iterator["CloudPath"]:
    """
    Find paths matching pattern recursively.
    
    Args:
        pattern: Glob pattern to match
        
    Returns:
        Iterator of matching CloudPath objects in all subdirectories
    """

Directory Walking

Traverse directory trees with full control over traversal order.

def walk(
    self,
    top_down: bool = True
) -> typing.Iterator[typing.Tuple["CloudPath", typing.List[str], typing.List[str]]]:
    """
    Walk directory tree.
    
    Args:
        top_down: Visit directories top-down if True, bottom-up if False
        
    Returns:
        Iterator of (directory_path, subdirectory_names, file_names) tuples
    """

Usage Examples

Basic Directory Operations

from cloudpathlib import CloudPath

# Check if directory exists
dir_path = CloudPath("s3://my-bucket/data/")
if dir_path.exists():
    print("Directory exists")

# Check path type
if dir_path.is_dir():
    print("This is a directory")
elif dir_path.is_file():
    print("This is a file")

Creating Directories

# Create single directory
dir_path = CloudPath("s3://my-bucket/new-folder/")
dir_path.mkdir(exist_ok=True)

# Create nested directories
nested_path = CloudPath("s3://my-bucket/level1/level2/level3/")
nested_path.mkdir(parents=True, exist_ok=True)

# Handle creation errors
try:
    dir_path.mkdir()
except CloudPathFileExistsError:
    print("Directory already exists")

Listing Directory Contents

# List all items in directory
dir_path = CloudPath("s3://my-bucket/data/")

for item in dir_path.iterdir():
    if item.is_file():
        print(f"File: {item.name}")
    elif item.is_dir():
        print(f"Directory: {item.name}")

# Get lists of files and directories
files = [item for item in dir_path.iterdir() if item.is_file()]
dirs = [item for item in dir_path.iterdir() if item.is_dir()]

Pattern Matching with Glob

# Find all text files
base_path = CloudPath("s3://my-bucket/")

# Non-recursive glob
txt_files = list(base_path.glob("*.txt"))
print(f"Found {len(txt_files)} .txt files")

# Recursive glob
all_txt_files = list(base_path.rglob("*.txt"))
print(f"Found {len(all_txt_files)} .txt files recursively")

# Complex patterns
csv_files = list(base_path.glob("data/**/*.csv"))
log_files = list(base_path.rglob("logs/*.log"))

Advanced Pattern Matching

# Multiple file extensions
base_path = CloudPath("s3://my-bucket/")

# Find multiple types
data_files = []
for pattern in ["*.csv", "*.json", "*.parquet"]:
    data_files.extend(base_path.rglob(pattern))

# Find files with specific naming
report_files = list(base_path.glob("reports/report-*.pdf"))
dated_logs = list(base_path.glob("logs/2024-*/access.log"))

Directory Walking

# Walk entire directory tree
base_path = CloudPath("s3://my-bucket/data/")

for root, dirs, files in base_path.walk():
    print(f"Directory: {root}")
    print(f"  Subdirectories: {dirs}")
    print(f"  Files: {files}")
    print()

# Process all files recursively
for root, dirs, files in base_path.walk():
    for filename in files:
        file_path = root / filename
        if file_path.suffix == '.txt':
            process_text_file(file_path)

Directory Tree Operations

# Create directory structure
base = CloudPath("s3://my-bucket/project/")
(base / "src").mkdir(parents=True, exist_ok=True)
(base / "tests").mkdir(exist_ok=True)
(base / "docs").mkdir(exist_ok=True)
(base / "data" / "raw").mkdir(parents=True, exist_ok=True)
(base / "data" / "processed").mkdir(exist_ok=True)

# Remove directory tree
old_project = CloudPath("s3://my-bucket/old-project/")
if old_project.exists():
    old_project.rmtree()

Safe Directory Operations

from cloudpathlib import DirectoryNotEmptyError, CloudPathNotExistsError

dir_path = CloudPath("s3://my-bucket/temp/")

# Safe directory removal
try:
    dir_path.rmdir()  # Remove empty directory
except DirectoryNotEmptyError:
    print("Directory not empty, use rmtree() to remove recursively")
    dir_path.rmtree()
except CloudPathNotExistsError:
    print("Directory doesn't exist")

# Check before operations
if dir_path.exists() and dir_path.is_dir():
    # Safe to perform directory operations
    for item in dir_path.iterdir():
        print(item)

Finding Specific Files

# Find files by extension
base_path = CloudPath("s3://my-bucket/")

# All Python files
py_files = list(base_path.rglob("*.py"))

# All image files
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.gif"]
images = []
for ext in image_extensions:
    images.extend(base_path.rglob(ext))

# Find configuration files
config_files = list(base_path.rglob("config.*"))

Directory Size and Statistics

def get_directory_size(dir_path):
    """Calculate total size of directory."""
    total_size = 0
    file_count = 0
    
    for root, dirs, files in dir_path.walk():
        for filename in files:
            file_path = root / filename
            try:
                stats = file_path.stat()
                total_size += stats.st_size
                file_count += 1
            except Exception:
                continue
    
    return total_size, file_count

# Usage
dir_path = CloudPath("s3://my-bucket/data/")
size, count = get_directory_size(dir_path)
print(f"Directory contains {count} files totaling {size} bytes")

Organizing Files

# Organize files by type
source_dir = CloudPath("s3://my-bucket/uploads/")
target_base = CloudPath("s3://my-bucket/organized/")

# Create organization structure
(target_base / "images").mkdir(parents=True, exist_ok=True)
(target_base / "documents").mkdir(exist_ok=True)
(target_base / "data").mkdir(exist_ok=True)

# Organize by file type
for file_path in source_dir.rglob("*"):
    if file_path.is_file():
        if file_path.suffix.lower() in ['.jpg', '.png', '.gif']:
            target = target_base / "images" / file_path.name
        elif file_path.suffix.lower() in ['.pdf', '.doc', '.txt']:
            target = target_base / "documents" / file_path.name
        elif file_path.suffix.lower() in ['.csv', '.json', '.xml']:
            target = target_base / "data" / file_path.name
        else:
            continue
        
        file_path.copy(target)

Install with Tessl CLI