Unified pythonic interface for diverse file systems and storage backends
npx @tessl/cli install tessl/pypi-fsspec@2025.9.0A unified, pythonic interface specification for working with diverse file systems and storage backends. fsspec enables developers to write storage-agnostic code that works seamlessly across different backends (local, S3, GCS, Azure, HTTP, SSH, etc.) through a consistent API that abstracts away implementation-specific details.
pip install fsspecimport fsspecFor direct filesystem instantiation:
from fsspec import filesystemFor file opening and URL parsing:
from fsspec import open, open_files, url_to_fsFor mapping interface:
from fsspec import get_mapper, FSMapimport fsspec
# Open a file from any supported backend
with fsspec.open('s3://bucket/file.txt', 'r') as f:
content = f.read()
# Open local file with same interface
with fsspec.open('/path/to/local/file.txt', 'r') as f:
content = f.read()
# Get filesystem instance directly
fs = fsspec.filesystem('s3', key='...', secret='...')
files = fs.ls('bucket/path/')
# Dictionary-like interface to filesystem
mapper = fsspec.get_mapper('s3://bucket/path/')
mapper['key'] = b'data'
data = mapper['key']
# Parse URL to get filesystem and path
fs, path = fsspec.url_to_fs('s3://bucket/file.txt')fsspec uses a plugin-based architecture with consistent interfaces:
This design enables fsspec to serve as the foundation for major data processing libraries including Dask, pandas, xarray, and Hugging Face Datasets, providing unified storage access across the Python data ecosystem.
Essential file and directory operations including opening files, reading/writing data, listing directories, and managing file metadata. These functions provide the primary interface for interacting with files across all supported storage backends.
def open(urlpath, mode='rb', compression=None, encoding='utf8', errors=None, protocol=None, newline=None, expand=None, **kwargs): ...
def open_files(urlpath, mode='rb', compression=None, encoding='utf8', errors=None, name_function=None, num=1, protocol=None, newline=None, auto_mkdir=True, expand=True, **kwargs): ...
def open_local(url, mode='rb', **kwargs): ...
def url_to_fs(url, **kwargs): ...
def get_fs_token_paths(urls, mode='rb', num=1, name_function=None, **kwargs): ...Plugin system for registering, discovering, and instantiating filesystem implementations. Enables dynamic loading of storage backend drivers and provides centralized access to available protocols.
def filesystem(protocol, **storage_options): ...
def get_filesystem_class(protocol): ...
def register_implementation(name, cls, clobber=False, errtxt=None): ...
def available_protocols(): ...Complete filesystem abstraction providing consistent methods for file operations, directory management, and metadata access across all storage backends. Serves as the base class for all filesystem implementations.
class AbstractFileSystem:
def open(self, path, mode='rb', **kwargs): ...
def ls(self, path, detail=True, **kwargs): ...
def info(self, path, **kwargs): ...
def exists(self, path, **kwargs): ...
def cat_file(self, path, start=None, end=None, **kwargs): ...
def pipe_file(self, path, value, **kwargs): ...
def copy(self, path1, path2, recursive=False, **kwargs): ...
def rm(self, path, recursive=False, maxdepth=None): ...Key-value store interface that presents filesystem paths as dictionary keys, enabling intuitive data access patterns and integration with mapping-based workflows.
class FSMap:
def __getitem__(self, key): ...
def __setitem__(self, key, value): ...
def __delitem__(self, key): ...
def getitems(self, keys, on_error='raise'): ...
def setitems(self, d): ...
def get_mapper(url='', check=False, create=False, **kwargs): ...Multiple caching strategies for optimizing filesystem access patterns, including memory mapping, block caching, read-ahead caching, and background prefetching for improved performance with remote storage.
class BaseCache: ...
class MMapCache(BaseCache): ...
class ReadAheadCache(BaseCache): ...
class BlockCache(BaseCache): ...
class BytesCache(BaseCache): ...
class BackgroundBlockCache(BaseCache): ...Extensible callback system for monitoring file transfer progress, supporting both built-in progress indicators and custom callback implementations for long-running operations.
class Callback:
def __call__(self, size_or_none=None, value_or_none=None): ...
def set_size(self, size): ...
def relative_update(self, inc=1): ...
def branched(self, path_1, path_2, **kwargs): ...
class TqdmCallback(Callback): ...
class DotPrinterCallback(Callback): ...Automatic compression/decompression support for multiple formats, enabling transparent handling of compressed files across all filesystem backends.
def register_compression(name, callback, extensions, force=False): ...
def available_compressions(): ...Helper functions for URL parsing, path manipulation, tokenization, and configuration management that support the core filesystem operations.
def infer_storage_options(urlpath, inherit_storage_options=None): ...
def infer_compression(filename): ...
def stringify_path(filepath): ...
def tokenize(*args, **kwargs): ...
def get_protocol(url): ...class FSTimeoutError(Exception):
"""Timeout in filesystem operations"""
class BlocksizeMismatchError(ValueError):
"""Raised when cached file is reopened with different blocksize than original"""
class OpenFile:
"""File-like object for deferred opening"""
def __init__(self, fs, path, mode='rb', compression=None, encoding=None, errors=None, newline=None): ...
def __enter__(self): ...
def __exit__(self, *args): ...
def open(self): ...
class Transaction:
"""Filesystem transaction context"""
def __init__(self, fs, **kwargs): ...
def __enter__(self): ...
def __exit__(self, exc_type, exc_val, exc_tb): ...
def start(self): ...
def complete(self, commit=True): ...