Unified pythonic interface for diverse file systems and storage backends
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Key-value store interface that presents filesystem paths as dictionary keys, enabling intuitive data access patterns and integration with mapping-based workflows. The FSMap class implements Python's MutableMapping interface to provide familiar dictionary operations on filesystem data.
Dictionary-like interface to filesystem that maps string keys to file contents as bytes values.
class FSMap:
"""Dictionary-like interface to filesystem paths."""
def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
"""
Initialize FSMap.
Parameters:
- root: str, root path for mapping
- fs: AbstractFileSystem, filesystem instance
- check: bool, check if root path exists
- create: bool, create root path if it doesn't exist
- missing_exceptions: tuple, exceptions to catch for missing files
"""Standard dictionary operations implemented through the MutableMapping interface.
def __getitem__(self, key):
"""
Get file contents by key.
Parameters:
- key: str, file key (relative to root)
Returns:
bytes, file contents
"""
def __setitem__(self, key, value):
"""
Set file contents by key.
Parameters:
- key: str, file key (relative to root)
- value: bytes, data to write
"""
def __delitem__(self, key):
"""
Delete file by key.
Parameters:
- key: str, file key (relative to root)
"""
def __iter__(self):
"""
Iterate over all keys.
Returns:
iterator, file keys
"""
def __len__(self):
"""
Get number of files.
Returns:
int, number of files in mapping
"""Efficient operations for working with multiple keys simultaneously.
def getitems(self, keys, on_error='raise'):
"""
Get multiple items by keys.
Parameters:
- keys: list, file keys to retrieve
- on_error: str, how to handle missing keys ('raise', 'omit', 'return_none')
Returns:
dict, mapping of keys to file contents
"""
def setitems(self, d):
"""
Set multiple items from dictionary.
Parameters:
- d: dict, mapping of keys to data
"""
def delitems(self, keys):
"""
Delete multiple items by keys.
Parameters:
- keys: list, file keys to delete
"""
def clear(self):
"""Remove all files from the mapping."""Additional properties and utility methods for working with the mapped filesystem.
@property
def dirfs(self):
"""
Get DirFileSystem for this mapping.
Returns:
DirFileSystem, filesystem view of the mapping directory
"""Convenience function for creating FSMap instances from URLs.
def get_mapper(url='', check=False, create=False, **kwargs):
"""
Create a key-value store interface to a filesystem.
Parameters:
- url: str, filesystem URL (default: current directory)
- check: bool, check if path exists
- create: bool, create path if it doesn't exist
- **kwargs: additional options passed to filesystem
Returns:
FSMap, dictionary-like interface
"""# Create mapper for S3 bucket
mapper = fsspec.get_mapper('s3://bucket/data/')
# Write data like a dictionary
mapper['file1.txt'] = b'Hello, world!'
mapper['subdir/file2.json'] = b'{"key": "value"}'
# Read data like a dictionary
content = mapper['file1.txt']
print(content.decode()) # Hello, world!
# Check if key exists
if 'file1.txt' in mapper:
print('File exists')
# Delete files
del mapper['file1.txt']
# Get all keys
keys = list(mapper.keys())# Write multiple files at once
data = {
'file1.txt': b'Content 1',
'file2.txt': b'Content 2',
'file3.txt': b'Content 3'
}
mapper.setitems(data)
# Read multiple files
contents = mapper.getitems(['file1.txt', 'file2.txt'])
# Handle missing files gracefully
contents = mapper.getitems(['file1.txt', 'missing.txt'], on_error='omit')
# Delete multiple files
mapper.delitems(['file1.txt', 'file2.txt'])import json
import pickle
# JSON data storage
mapper = fsspec.get_mapper('s3://bucket/json-data/')
# Store JSON data
data = {'name': 'example', 'values': [1, 2, 3]}
mapper['config.json'] = json.dumps(data).encode()
# Load JSON data
raw_data = mapper['config.json']
config = json.loads(raw_data.decode())
# Binary data storage
binary_mapper = fsspec.get_mapper('gcs://bucket/models/')
# Store pickled model
import pickle
model = {'weights': [1.0, 2.0, 3.0], 'bias': 0.5}
binary_mapper['model.pkl'] = pickle.dumps(model)
# Load pickled model
model_data = binary_mapper['model.pkl']
loaded_model = pickle.loads(model_data)# Create mapper with nested directory structure
mapper = fsspec.get_mapper('local:///data/experiment/')
# Organize data hierarchically using key paths
mapper['inputs/train.csv'] = train_data
mapper['inputs/test.csv'] = test_data
mapper['models/v1/weights.pkl'] = model_weights
mapper['models/v1/config.json'] = model_config
mapper['results/metrics.json'] = evaluation_metrics
# List all keys to see structure
for key in mapper:
print(key)
# inputs/train.csv
# inputs/test.csv
# models/v1/weights.pkl
# models/v1/config.json
# results/metrics.jsonmapper = fsspec.get_mapper('s3://bucket/data/')
try:
# This will raise KeyError if file doesn't exist
content = mapper['nonexistent.txt']
except KeyError:
print('File not found')
# Use getitems for graceful handling
result = mapper.getitems(['file1.txt', 'missing.txt'], on_error='omit')
# Only existing files are returned
# Check existence before access
if 'uncertain_file.txt' in mapper:
content = mapper['uncertain_file.txt']# Use bulk operations for better performance
keys_to_read = ['file1.txt', 'file2.txt', 'file3.txt']
# Efficient: single bulk operation
contents = mapper.getitems(keys_to_read)
# Inefficient: multiple individual operations
contents = {}
for key in keys_to_read:
contents[key] = mapper[key]
# Efficient bulk write
data_batch = {
f'batch_{i}.txt': f'Data {i}'.encode()
for i in range(100)
}
mapper.setitems(data_batch)import zarr
# Create mapper for Zarr store
store = fsspec.get_mapper('s3://bucket/zarr-data.zarr')
# Create Zarr array using fsspec mapper
z = zarr.zeros((1000, 1000), chunks=(100, 100), store=store)
# Write data to array
z[:100, :100] = 1.0
# The zarr metadata and chunks are stored as files in the mapper
print(list(store.keys()))
# ['.zarray', '0.0', '0.1', '1.0', '1.1', ...]# Create cached mapper for better performance
cached_mapper = fsspec.get_mapper(
'simplecache::s3://bucket/data/',
s3={'key': 'ACCESS_KEY', 'secret': 'SECRET_KEY'},
cache_storage='/tmp/fsspec-cache'
)
# First access downloads and caches
data = cached_mapper['large_file.dat']
# Subsequent access reads from local cache
data = cached_mapper['large_file.dat'] # Much fasterInstall with Tessl CLI
npx tessl i tessl/pypi-fsspec