CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-fsspec

Unified pythonic interface for diverse file systems and storage backends

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

mapping.mddocs/

Dictionary Mapping Interface

Key-value store interface that presents filesystem paths as dictionary keys, enabling intuitive data access patterns and integration with mapping-based workflows. The FSMap class implements Python's MutableMapping interface to provide familiar dictionary operations on filesystem data.

Capabilities

FSMap Class

Dictionary-like interface to filesystem that maps string keys to file contents as bytes values.

class FSMap:
    """Dictionary-like interface to filesystem paths."""
    
    def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
        """
        Initialize FSMap.
        
        Parameters:
        - root: str, root path for mapping
        - fs: AbstractFileSystem, filesystem instance
        - check: bool, check if root path exists
        - create: bool, create root path if it doesn't exist
        - missing_exceptions: tuple, exceptions to catch for missing files
        """

Dictionary Interface Operations

Standard dictionary operations implemented through the MutableMapping interface.

def __getitem__(self, key):
    """
    Get file contents by key.
    
    Parameters:
    - key: str, file key (relative to root)
    
    Returns:
    bytes, file contents
    """

def __setitem__(self, key, value):
    """
    Set file contents by key.
    
    Parameters:
    - key: str, file key (relative to root)
    - value: bytes, data to write
    """

def __delitem__(self, key):
    """
    Delete file by key.
    
    Parameters:
    - key: str, file key (relative to root)
    """

def __iter__(self):
    """
    Iterate over all keys.
    
    Returns:
    iterator, file keys
    """

def __len__(self):
    """
    Get number of files.
    
    Returns:
    int, number of files in mapping
    """

Bulk Operations

Efficient operations for working with multiple keys simultaneously.

def getitems(self, keys, on_error='raise'):
    """
    Get multiple items by keys.
    
    Parameters:
    - keys: list, file keys to retrieve
    - on_error: str, how to handle missing keys ('raise', 'omit', 'return_none')
    
    Returns:
    dict, mapping of keys to file contents
    """

def setitems(self, d):
    """
    Set multiple items from dictionary.
    
    Parameters:
    - d: dict, mapping of keys to data
    """

def delitems(self, keys):
    """
    Delete multiple items by keys.
    
    Parameters:
    - keys: list, file keys to delete
    """

def clear(self):
    """Remove all files from the mapping."""

Properties and Utilities

Additional properties and utility methods for working with the mapped filesystem.

@property
def dirfs(self):
    """
    Get DirFileSystem for this mapping.
    
    Returns:
    DirFileSystem, filesystem view of the mapping directory
    """

Mapper Creation Function

Convenience function for creating FSMap instances from URLs.

def get_mapper(url='', check=False, create=False, **kwargs):
    """
    Create a key-value store interface to a filesystem.
    
    Parameters:
    - url: str, filesystem URL (default: current directory)
    - check: bool, check if path exists
    - create: bool, create path if it doesn't exist
    - **kwargs: additional options passed to filesystem
    
    Returns:
    FSMap, dictionary-like interface
    """

Usage Patterns

Basic Dictionary Operations

# Create mapper for S3 bucket
mapper = fsspec.get_mapper('s3://bucket/data/')

# Write data like a dictionary
mapper['file1.txt'] = b'Hello, world!'
mapper['subdir/file2.json'] = b'{"key": "value"}'

# Read data like a dictionary
content = mapper['file1.txt']
print(content.decode())  # Hello, world!

# Check if key exists
if 'file1.txt' in mapper:
    print('File exists')

# Delete files
del mapper['file1.txt']

# Get all keys
keys = list(mapper.keys())

Bulk Operations

# Write multiple files at once
data = {
    'file1.txt': b'Content 1',
    'file2.txt': b'Content 2', 
    'file3.txt': b'Content 3'
}
mapper.setitems(data)

# Read multiple files
contents = mapper.getitems(['file1.txt', 'file2.txt'])

# Handle missing files gracefully
contents = mapper.getitems(['file1.txt', 'missing.txt'], on_error='omit')

# Delete multiple files
mapper.delitems(['file1.txt', 'file2.txt'])

Integration with Data Processing

import json
import pickle

# JSON data storage
mapper = fsspec.get_mapper('s3://bucket/json-data/')

# Store JSON data
data = {'name': 'example', 'values': [1, 2, 3]}
mapper['config.json'] = json.dumps(data).encode()

# Load JSON data
raw_data = mapper['config.json']
config = json.loads(raw_data.decode())

# Binary data storage
binary_mapper = fsspec.get_mapper('gcs://bucket/models/')

# Store pickled model
import pickle
model = {'weights': [1.0, 2.0, 3.0], 'bias': 0.5}
binary_mapper['model.pkl'] = pickle.dumps(model)

# Load pickled model
model_data = binary_mapper['model.pkl']
loaded_model = pickle.loads(model_data)

Working with Nested Structures

# Create mapper with nested directory structure
mapper = fsspec.get_mapper('local:///data/experiment/')

# Organize data hierarchically using key paths
mapper['inputs/train.csv'] = train_data
mapper['inputs/test.csv'] = test_data
mapper['models/v1/weights.pkl'] = model_weights
mapper['models/v1/config.json'] = model_config
mapper['results/metrics.json'] = evaluation_metrics

# List all keys to see structure
for key in mapper:
    print(key)
# inputs/train.csv
# inputs/test.csv  
# models/v1/weights.pkl
# models/v1/config.json
# results/metrics.json

Error Handling

mapper = fsspec.get_mapper('s3://bucket/data/')

try:
    # This will raise KeyError if file doesn't exist
    content = mapper['nonexistent.txt']
except KeyError:
    print('File not found')

# Use getitems for graceful handling
result = mapper.getitems(['file1.txt', 'missing.txt'], on_error='omit')
# Only existing files are returned

# Check existence before access
if 'uncertain_file.txt' in mapper:
    content = mapper['uncertain_file.txt']

Performance Optimization

# Use bulk operations for better performance
keys_to_read = ['file1.txt', 'file2.txt', 'file3.txt']

# Efficient: single bulk operation
contents = mapper.getitems(keys_to_read)

# Inefficient: multiple individual operations
contents = {}
for key in keys_to_read:
    contents[key] = mapper[key]

# Efficient bulk write
data_batch = {
    f'batch_{i}.txt': f'Data {i}'.encode()
    for i in range(100)
}
mapper.setitems(data_batch)

Integration with Zarr and Array Libraries

import zarr

# Create mapper for Zarr store
store = fsspec.get_mapper('s3://bucket/zarr-data.zarr')

# Create Zarr array using fsspec mapper
z = zarr.zeros((1000, 1000), chunks=(100, 100), store=store)

# Write data to array
z[:100, :100] = 1.0

# The zarr metadata and chunks are stored as files in the mapper
print(list(store.keys()))
# ['.zarray', '0.0', '0.1', '1.0', '1.1', ...]

Caching and Local Access

# Create cached mapper for better performance
cached_mapper = fsspec.get_mapper(
    'simplecache::s3://bucket/data/',
    s3={'key': 'ACCESS_KEY', 'secret': 'SECRET_KEY'},
    cache_storage='/tmp/fsspec-cache'
)

# First access downloads and caches
data = cached_mapper['large_file.dat']

# Subsequent access reads from local cache
data = cached_mapper['large_file.dat']  # Much faster

Install with Tessl CLI

npx tessl i tessl/pypi-fsspec

docs

caching.md

callbacks.md

compression.md

core-operations.md

filesystem-interface.md

index.md

mapping.md

registry.md

utilities.md

tile.json