CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-etils

Collection of common python utils for machine learning and scientific computing workflows

Pending
Overview
Eval results
Files

path-operations.mddocs/

Path Operations (epath)

Pathlib-compatible API that extends standard file operations to cloud storage systems including Google Cloud Storage (gs://), AWS S3 (s3://), and other remote filesystems. Built on top of Python's pathlib with seamless cloud integration.

Capabilities

Core Path Class

The main Path class provides a unified interface for local and cloud file operations.

class Path:
    """
    Pathlib-compatible path for local and cloud storage.
    
    Supports local paths, gs://, s3://, and other cloud storage systems.
    """
    def __init__(self, path: str | PathLike) -> None: ...
    
    # File content operations
    def read_text(self, encoding: str = 'utf-8') -> str: ...
    def write_text(
        self, 
        data: str, 
        encoding: str | None = None,
        errors: str | None = None
    ) -> int: ...
    def read_bytes(self) -> bytes: ...
    def write_bytes(self, data: bytes) -> int: ...
    
    # File system operations
    def exists(self) -> bool: ...
    def is_file(self) -> bool: ...
    def is_dir(self) -> bool: ...
    def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: ...
    def rmdir(self) -> None: ...
    def unlink(self, missing_ok: bool = False) -> None: ...
    
    # Path navigation
    def glob(self, pattern: str) -> Iterator[Path]: ...
    def rglob(self, pattern: str) -> Iterator[Path]: ...
    def iterdir(self) -> Iterator[Path]: ...
    
    # Path properties
    @property
    def parent(self) -> Path: ...
    @property
    def name(self) -> str: ...
    @property
    def stem(self) -> str: ...
    @property
    def suffix(self) -> str: ...
    @property
    def parts(self) -> tuple[str, ...]: ...
    
    # File and directory operations
    def mkdir(self, mode: Optional[int] = None, parents: bool = False, exist_ok: bool = False) -> None: ...
    def rmtree(self, missing_ok: bool = False) -> None: ...
    def touch(self, mode: int = 0o666, exist_ok: bool = True) -> None: ...
    def rename(self, target: PathLike) -> Path: ...
    def replace(self, target: PathLike) -> Path: ...
    def copy(self, dst: PathLike, overwrite: bool = False) -> Path: ...
    def stat(self) -> StatResult: ...
    def walk(self, *, top_down: bool = True, on_error: Optional[Callable[[OSError], Any]] = None) -> Iterator[tuple[Path, list[str], list[str]]]: ...
    
    # Advanced path operations
    def open(self, mode: str = 'r', encoding: Optional[str] = None, errors: Optional[str] = None, **kwargs) -> Any: ...
    def expanduser(self) -> Path: ...
    def resolve(self, strict: bool = False) -> Path: ...
    def format(self, *args, **kwargs) -> Path: ...
    
    # Inherited pathlib methods
    def is_absolute(self) -> bool: ...
    def is_reserved(self) -> bool: ...
    def as_posix(self) -> str: ...
    def as_uri(self) -> str: ...
    def match(self, pattern: str) -> bool: ...
    def relative_to(self, other: PathLike) -> Path: ...
    def with_name(self, name: str) -> Path: ...
    def with_stem(self, stem: str) -> Path: ...
    def with_suffix(self, suffix: str) -> Path: ...
    def joinpath(self, *args: str | PathLike) -> Path: ...
    
    # Additional properties
    @property
    def anchor(self) -> str: ...
    @property
    def suffixes(self) -> list[str]: ...
    
    # Path operations
    def __truediv__(self, other: str | PathLike) -> Path: ...
    def __str__(self) -> str: ...
    def __repr__(self) -> str: ...

Path Type Definitions

Type aliases and classes for path-like objects.

PathLike = Union[str, Path, os.PathLike]
PathLikeCls = (str, os.PathLike)  # Used for isinstance checks

@dataclasses.dataclass
class StatResult:
    """File metadata information."""
    is_directory: bool
    length: int
    mtime: int
    owner: Optional[str] = None
    group: Optional[str] = None
    mode: Optional[int] = None

Path Registration

Register custom path classes for different storage backends.

def register_path_cls(cls: type[Path]) -> None:
    """
    Register a custom path class for specific protocols.
    
    Args:
        cls: Path class to register
    """

Resource Utilities

Access package resources and convert paths for writing.

def resource_path(package: Union[str, types.ModuleType]) -> Path:
    """
    Get path to the root directory of a Python package.
    
    Args:
        package: Python package name or module object
        
    Returns:
        Path to the package root directory
    """

def to_write_path(path: PathLike) -> Path:
    """
    Convert path to a writable path format.
    
    Args:
        path: Input path
        
    Returns:
        Path suitable for writing operations
    """

Absl Flags Integration

Define path flags for command-line applications.

def DEFINE_path(
    name: str,
    default: str | None,
    help: str,
    **kwargs
) -> None:
    """
    Define a path flag for Absl applications.
    
    Args:
        name: Flag name
        default: Default path value
        help: Help text
    """

Testing Utilities

Testing utilities for path operations.

testing: ModuleType  # Testing utilities module

Usage Examples

Basic File Operations

from etils import epath

# Local file operations
local_path = epath.Path('/tmp/data.txt')
local_path.write_text('Hello, world!')
content = local_path.read_text()

# Cloud storage operations
cloud_path = epath.Path('gs://my-bucket/data.txt')
cloud_path.write_text('Cloud data')
exists = cloud_path.exists()

Directory Operations

from etils import epath

# Create directories
data_dir = epath.Path('gs://my-bucket/data')
data_dir.mkdir(parents=True, exist_ok=True)

# List files
for file_path in data_dir.glob('*.txt'):
    print(f"Found: {file_path}")

# Recursive search
for py_file in data_dir.rglob('*.py'):
    print(f"Python file: {py_file}")

Path Manipulation

from etils import epath

# Path construction
base = epath.Path('gs://my-bucket')
data_path = base / 'experiments' / 'run_001' / 'results.json'

# Path properties
print(f"Name: {data_path.name}")           # results.json
print(f"Stem: {data_path.stem}")           # results
print(f"Suffix: {data_path.suffix}")       # .json
print(f"Parent: {data_path.parent}")       # gs://my-bucket/experiments/run_001

Resource Access

from etils import epath

# Access package resources
package_root = epath.resource_path('my_package')
config_path = package_root / 'config/settings.json'
config_data = config_path.read_text()

# Prepare paths for writing
output_path = epath.to_write_path('gs://results/output.txt')
output_path.write_text('Results data')

Install with Tessl CLI

npx tessl i tessl/pypi-etils

docs

application-framework.md

array-types.md

colab-integration.md

dataclass-enhancements.md

index.md

numpy-utilities.md

path-operations.md

python-utilities.md

tree-manipulation.md

tile.json