Collection of common python utils for machine learning and scientific computing workflows
—
Pathlib-compatible API that extends standard file operations to cloud storage systems including Google Cloud Storage (gs://), AWS S3 (s3://), and other remote filesystems. Built on top of Python's pathlib with seamless cloud integration.
The main Path class provides a unified interface for local and cloud file operations.
class Path:
"""
Pathlib-compatible path for local and cloud storage.
Supports local paths, gs://, s3://, and other cloud storage systems.
"""
def __init__(self, path: str | PathLike) -> None: ...
# File content operations
def read_text(self, encoding: str = 'utf-8') -> str: ...
def write_text(
self,
data: str,
encoding: str | None = None,
errors: str | None = None
) -> int: ...
def read_bytes(self) -> bytes: ...
def write_bytes(self, data: bytes) -> int: ...
# File system operations
def exists(self) -> bool: ...
def is_file(self) -> bool: ...
def is_dir(self) -> bool: ...
def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: ...
def rmdir(self) -> None: ...
def unlink(self, missing_ok: bool = False) -> None: ...
# Path navigation
def glob(self, pattern: str) -> Iterator[Path]: ...
def rglob(self, pattern: str) -> Iterator[Path]: ...
def iterdir(self) -> Iterator[Path]: ...
# Path properties
@property
def parent(self) -> Path: ...
@property
def name(self) -> str: ...
@property
def stem(self) -> str: ...
@property
def suffix(self) -> str: ...
@property
def parts(self) -> tuple[str, ...]: ...
# File and directory operations
def mkdir(self, mode: Optional[int] = None, parents: bool = False, exist_ok: bool = False) -> None: ...
def rmtree(self, missing_ok: bool = False) -> None: ...
def touch(self, mode: int = 0o666, exist_ok: bool = True) -> None: ...
def rename(self, target: PathLike) -> Path: ...
def replace(self, target: PathLike) -> Path: ...
def copy(self, dst: PathLike, overwrite: bool = False) -> Path: ...
def stat(self) -> StatResult: ...
def walk(self, *, top_down: bool = True, on_error: Optional[Callable[[OSError], Any]] = None) -> Iterator[tuple[Path, list[str], list[str]]]: ...
# Advanced path operations
def open(self, mode: str = 'r', encoding: Optional[str] = None, errors: Optional[str] = None, **kwargs) -> Any: ...
def expanduser(self) -> Path: ...
def resolve(self, strict: bool = False) -> Path: ...
def format(self, *args, **kwargs) -> Path: ...
# Inherited pathlib methods
def is_absolute(self) -> bool: ...
def is_reserved(self) -> bool: ...
def as_posix(self) -> str: ...
def as_uri(self) -> str: ...
def match(self, pattern: str) -> bool: ...
def relative_to(self, other: PathLike) -> Path: ...
def with_name(self, name: str) -> Path: ...
def with_stem(self, stem: str) -> Path: ...
def with_suffix(self, suffix: str) -> Path: ...
def joinpath(self, *args: str | PathLike) -> Path: ...
# Additional properties
@property
def anchor(self) -> str: ...
@property
def suffixes(self) -> list[str]: ...
# Path operations
def __truediv__(self, other: str | PathLike) -> Path: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...Type aliases and classes for path-like objects.
PathLike = Union[str, Path, os.PathLike]
PathLikeCls = (str, os.PathLike) # Used for isinstance checks
@dataclasses.dataclass
class StatResult:
"""File metadata information."""
is_directory: bool
length: int
mtime: int
owner: Optional[str] = None
group: Optional[str] = None
mode: Optional[int] = NoneRegister custom path classes for different storage backends.
def register_path_cls(cls: type[Path]) -> None:
"""
Register a custom path class for specific protocols.
Args:
cls: Path class to register
"""Access package resources and convert paths for writing.
def resource_path(package: Union[str, types.ModuleType]) -> Path:
"""
Get path to the root directory of a Python package.
Args:
package: Python package name or module object
Returns:
Path to the package root directory
"""
def to_write_path(path: PathLike) -> Path:
"""
Convert path to a writable path format.
Args:
path: Input path
Returns:
Path suitable for writing operations
"""Define path flags for command-line applications.
def DEFINE_path(
name: str,
default: str | None,
help: str,
**kwargs
) -> None:
"""
Define a path flag for Absl applications.
Args:
name: Flag name
default: Default path value
help: Help text
"""Testing utilities for path operations.
testing: ModuleType # Testing utilities modulefrom etils import epath
# Local file operations
local_path = epath.Path('/tmp/data.txt')
local_path.write_text('Hello, world!')
content = local_path.read_text()
# Cloud storage operations
cloud_path = epath.Path('gs://my-bucket/data.txt')
cloud_path.write_text('Cloud data')
exists = cloud_path.exists()from etils import epath
# Create directories
data_dir = epath.Path('gs://my-bucket/data')
data_dir.mkdir(parents=True, exist_ok=True)
# List files
for file_path in data_dir.glob('*.txt'):
print(f"Found: {file_path}")
# Recursive search
for py_file in data_dir.rglob('*.py'):
print(f"Python file: {py_file}")from etils import epath
# Path construction
base = epath.Path('gs://my-bucket')
data_path = base / 'experiments' / 'run_001' / 'results.json'
# Path properties
print(f"Name: {data_path.name}") # results.json
print(f"Stem: {data_path.stem}") # results
print(f"Suffix: {data_path.suffix}") # .json
print(f"Parent: {data_path.parent}") # gs://my-bucket/experiments/run_001from etils import epath
# Access package resources
package_root = epath.resource_path('my_package')
config_path = package_root / 'config/settings.json'
config_data = config_path.read_text()
# Prepare paths for writing
output_path = epath.to_write_path('gs://results/output.txt')
output_path.write_text('Results data')Install with Tessl CLI
npx tessl i tessl/pypi-etils