Unified pythonic interface for diverse file systems and storage backends
—
Complete filesystem abstraction providing consistent methods for file operations, directory management, and metadata access across all storage backends. AbstractFileSystem serves as the base class for all filesystem implementations and defines the unified interface that enables storage-agnostic code.
Core file access methods that handle opening files and reading/writing data with support for various modes and options.
def open(self, path, mode='rb', **kwargs):
"""
Open a file-like object.
Parameters:
- path: str, file path
- mode: str, opening mode ('r', 'w', 'a', 'rb', 'wb', etc.)
- **kwargs: additional options (block_size, cache_type, etc.)
Returns:
File-like object
"""
def cat_file(self, path, start=None, end=None, **kwargs):
"""
Read file contents as bytes.
Parameters:
- path: str, file path
- start: int, byte offset to start reading
- end: int, byte offset to stop reading
Returns:
bytes, file contents
"""
def pipe_file(self, path, value, **kwargs):
"""
Write bytes to a file.
Parameters:
- path: str, file path
- value: bytes, data to write
"""
def read_text(self, path, encoding=None, **kwargs):
"""
Read file contents as text.
Parameters:
- path: str, file path
- encoding: str, text encoding
Returns:
str, file contents
"""
def write_text(self, path, value, encoding=None, **kwargs):
"""
Write text to a file.
Parameters:
- path: str, file path
- value: str, text to write
- encoding: str, text encoding
"""Methods for creating, listing, and managing directories across different storage backends.
def ls(self, path, detail=True, **kwargs):
"""
List directory contents.
Parameters:
- path: str, directory path
- detail: bool, return detailed info or just names
Returns:
list, file/directory information
"""
def mkdir(self, path, create_parents=True, **kwargs):
"""
Create a directory.
Parameters:
- path: str, directory path
- create_parents: bool, create parent directories if needed
"""
def makedirs(self, path, exist_ok=False):
"""
Create directories recursively.
Parameters:
- path: str, directory path
- exist_ok: bool, don't raise error if directory exists
"""
def rmdir(self, path):
"""
Remove an empty directory.
Parameters:
- path: str, directory path
"""Methods for querying metadata, checking existence, and getting file properties.
def exists(self, path, **kwargs):
"""
Check if path exists.
Parameters:
- path: str, file or directory path
Returns:
bool, True if path exists
"""
def isdir(self, path):
"""
Check if path is a directory.
Parameters:
- path: str, path to check
Returns:
bool, True if path is directory
"""
def isfile(self, path):
"""
Check if path is a file.
Parameters:
- path: str, path to check
Returns:
bool, True if path is file
"""
def info(self, path, **kwargs):
"""
Get detailed information about a path.
Parameters:
- path: str, file or directory path
Returns:
dict, file metadata (size, type, mtime, etc.)
"""
def size(self, path):
"""
Get file size in bytes.
Parameters:
- path: str, file path
Returns:
int, file size in bytes
"""
def checksum(self, path):
"""
Get file checksum/hash.
Parameters:
- path: str, file path
Returns:
str, file checksum
"""
def created(self, path):
"""
Get file creation time.
Parameters:
- path: str, file path
Returns:
datetime, creation time
"""
def modified(self, path):
"""
Get file modification time.
Parameters:
- path: str, file path
Returns:
datetime, modification time
"""Methods for finding files using patterns, globbing, and walking directory trees.
def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
"""
Find files recursively.
Parameters:
- path: str, starting path
- maxdepth: int, maximum recursion depth
- withdirs: bool, include directories in results
- detail: bool, return detailed info or just paths
Returns:
list, found files/directories
"""
def glob(self, path, maxdepth=None, **kwargs):
"""
Find files matching glob pattern.
Parameters:
- path: str, glob pattern
- maxdepth: int, maximum recursion depth
Returns:
list, matching file paths
"""
def walk(self, path, maxdepth=None, topdown=True, **kwargs):
"""
Walk directory tree.
Parameters:
- path: str, starting directory
- maxdepth: int, maximum recursion depth
- topdown: bool, visit directories top-down or bottom-up
Returns:
generator, yields (dirpath, dirnames, filenames) tuples
"""
def du(self, path, total=True, maxdepth=None, **kwargs):
"""
Calculate disk usage.
Parameters:
- path: str, directory path
- total: bool, return total size or per-file breakdown
- maxdepth: int, maximum recursion depth
Returns:
int or dict, total size or size breakdown
"""Methods for moving, copying, uploading, and downloading files between locations.
def get_file(self, rpath, lpath, **kwargs):
"""
Download a single file to local filesystem.
Parameters:
- rpath: str, remote file path
- lpath: str, local file path
"""
def get(self, rpath, lpath, recursive=False, **kwargs):
"""
Download files/directories to local filesystem.
Parameters:
- rpath: str, remote path
- lpath: str, local path
- recursive: bool, download directories recursively
"""
def put_file(self, lpath, rpath, **kwargs):
"""
Upload a single file from local filesystem.
Parameters:
- lpath: str, local file path
- rpath: str, remote file path
"""
def put(self, lpath, rpath, recursive=False, **kwargs):
"""
Upload files/directories from local filesystem.
Parameters:
- lpath: str, local path
- rpath: str, remote path
- recursive: bool, upload directories recursively
"""
def copy(self, path1, path2, recursive=False, **kwargs):
"""
Copy files/directories within filesystem.
Parameters:
- path1: str, source path
- path2: str, destination path
- recursive: bool, copy directories recursively
"""
def mv(self, path1, path2, recursive=False, **kwargs):
"""
Move/rename files/directories.
Parameters:
- path1: str, source path
- path2: str, destination path
- recursive: bool, move directories recursively
"""Methods for deleting files and directories with various options for handling recursive deletion.
def rm_file(self, path):
"""
Remove a single file.
Parameters:
- path: str, file path
"""
def rm(self, path, recursive=False, maxdepth=None):
"""
Remove files/directories.
Parameters:
- path: str, path to remove
- recursive: bool, remove directories recursively
- maxdepth: int, maximum recursion depth
"""Methods for operating on multiple files efficiently with batching and parallel processing.
def cat(self, path, recursive=False, **kwargs):
"""
Read multiple files.
Parameters:
- path: str or list, file path(s) or pattern
- recursive: bool, include files in subdirectories
Returns:
bytes or dict, file contents (single file) or mapping (multiple files)
"""
def pipe(self, path, value=None, **kwargs):
"""
Write to multiple files.
Parameters:
- path: str or dict, file path(s) or path->data mapping
- value: bytes, data to write (if path is str)
"""
def head(self, path, size=1024):
"""
Read beginning of file.
Parameters:
- path: str, file path
- size: int, number of bytes to read
Returns:
bytes, file head content
"""
def tail(self, path, size=1024):
"""
Read end of file.
Parameters:
- path: str, file path
- size: int, number of bytes to read
Returns:
bytes, file tail content
"""
def touch(self, path, truncate=True, **kwargs):
"""
Create empty file or update timestamp.
Parameters:
- path: str, file path
- truncate: bool, truncate file if it exists
"""Advanced filesystem operations including unique key generation, path expansion, and utility methods.
def ukey(self, path):
"""
Generate unique key for file.
Parameters:
- path: str, file path
Returns:
str, unique key (typically includes size and mtime)
"""
def expand_path(self, path, recursive=False, **kwargs):
"""
Expand path patterns to actual paths.
Parameters:
- path: str, path pattern
- recursive: bool, expand recursively
Returns:
list, expanded paths
"""Methods for managing filesystem transactions to ensure atomic operations across multiple files.
def start_transaction(self):
"""
Start a filesystem transaction.
Returns:
Transaction, transaction context
"""
def end_transaction(self):
"""End the current transaction."""Methods for managing filesystem instance caching and performance optimization.
def invalidate_cache(self, path=None):
"""
Clear filesystem cache.
Parameters:
- path: str, specific path to invalidate (None for all)
"""
@classmethod
def clear_instance_cache(cls):
"""Clear all cached filesystem instances."""Methods for serializing filesystem instances and creating dictionary-like interfaces.
def get_mapper(self, root="", check=False, create=False):
"""
Get dictionary-like interface to filesystem.
Parameters:
- root: str, root path for mapping
- check: bool, check if root exists
- create: bool, create root if it doesn't exist
Returns:
FSMap, dictionary-like interface
"""
def to_json(self, include_password=True):
"""
Serialize filesystem to JSON.
Parameters:
- include_password: bool, include sensitive information
Returns:
str, JSON representation
"""
def to_dict(self, include_password=True):
"""
Serialize filesystem to dictionary.
Parameters:
- include_password: bool, include sensitive information
Returns:
dict, dictionary representation
"""
@classmethod
def from_json(cls, blob):
"""
Deserialize filesystem from JSON.
Parameters:
- blob: str, JSON representation
Returns:
AbstractFileSystem, deserialized instance
"""
@classmethod
def from_dict(cls, dct):
"""
Deserialize filesystem from dictionary.
Parameters:
- dct: dict, dictionary representation
Returns:
AbstractFileSystem, deserialized instance
"""protocol: str or list
"""Protocol name(s) handled by this filesystem"""
sep: str
"""Path separator (default '/')"""
blocksize: int
"""Default block size for reading operations"""
cachable: bool
"""Whether filesystem instances should be cached"""
transaction: Transaction
"""Current transaction context (if any)"""# Get filesystem instance
fs = fsspec.filesystem('s3', key='...', secret='...')
# Use filesystem methods directly
files = fs.ls('bucket/path/')
content = fs.cat_file('bucket/path/file.txt')
fs.pipe_file('bucket/path/output.txt', b'data')
# File operations
fs.copy('bucket/source.txt', 'bucket/backup.txt')
fs.rm('bucket/old_file.txt')# Start transaction for atomic operations
with fs.start_transaction():
fs.pipe_file('bucket/file1.txt', b'data1')
fs.pipe_file('bucket/file2.txt', b'data2')
# Both files committed togetherclass MyFileSystem(fsspec.AbstractFileSystem):
protocol = 'myfs'
def _open(self, path, mode='rb', **kwargs):
# Implement file opening
pass
def ls(self, path, detail=True, **kwargs):
# Implement directory listing
passInstall with Tessl CLI
npx tessl i tessl/pypi-fsspec