or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

caching.mdcallbacks.mdcompression.mdcore-operations.mdfilesystem-interface.mdindex.mdmapping.mdregistry.mdutilities.md

mapping.mddocs/

0

# Dictionary Mapping Interface

1

2

Key-value store interface that presents filesystem paths as dictionary keys, enabling intuitive data access patterns and integration with mapping-based workflows. The FSMap class implements Python's MutableMapping interface to provide familiar dictionary operations on filesystem data.

3

4

## Capabilities

5

6

### FSMap Class

7

8

Dictionary-like interface to filesystem that maps string keys to file contents as bytes values.

9

10

```python { .api }

11

class FSMap:

12

"""Dictionary-like interface to filesystem paths."""

13

14

def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):

15

"""

16

Initialize FSMap.

17

18

Parameters:

19

- root: str, root path for mapping

20

- fs: AbstractFileSystem, filesystem instance

21

- check: bool, check if root path exists

22

- create: bool, create root path if it doesn't exist

23

- missing_exceptions: tuple, exceptions to catch for missing files

24

"""

25

```

26

27

### Dictionary Interface Operations

28

29

Standard dictionary operations implemented through the MutableMapping interface.

30

31

```python { .api }

32

def __getitem__(self, key):

33

"""

34

Get file contents by key.

35

36

Parameters:

37

- key: str, file key (relative to root)

38

39

Returns:

40

bytes, file contents

41

"""

42

43

def __setitem__(self, key, value):

44

"""

45

Set file contents by key.

46

47

Parameters:

48

- key: str, file key (relative to root)

49

- value: bytes, data to write

50

"""

51

52

def __delitem__(self, key):

53

"""

54

Delete file by key.

55

56

Parameters:

57

- key: str, file key (relative to root)

58

"""

59

60

def __iter__(self):

61

"""

62

Iterate over all keys.

63

64

Returns:

65

iterator, file keys

66

"""

67

68

def __len__(self):

69

"""

70

Get number of files.

71

72

Returns:

73

int, number of files in mapping

74

"""

75

```

76

77

### Bulk Operations

78

79

Efficient operations for working with multiple keys simultaneously.

80

81

```python { .api }

82

def getitems(self, keys, on_error='raise'):

83

"""

84

Get multiple items by keys.

85

86

Parameters:

87

- keys: list, file keys to retrieve

88

- on_error: str, how to handle missing keys ('raise', 'omit', 'return_none')

89

90

Returns:

91

dict, mapping of keys to file contents

92

"""

93

94

def setitems(self, d):

95

"""

96

Set multiple items from dictionary.

97

98

Parameters:

99

- d: dict, mapping of keys to data

100

"""

101

102

def delitems(self, keys):

103

"""

104

Delete multiple items by keys.

105

106

Parameters:

107

- keys: list, file keys to delete

108

"""

109

110

def clear(self):

111

"""Remove all files from the mapping."""

112

```

113

114

### Properties and Utilities

115

116

Additional properties and utility methods for working with the mapped filesystem.

117

118

```python { .api }

119

@property

120

def dirfs(self):

121

"""

122

Get DirFileSystem for this mapping.

123

124

Returns:

125

DirFileSystem, filesystem view of the mapping directory

126

"""

127

```

128

129

### Mapper Creation Function

130

131

Convenience function for creating FSMap instances from URLs.

132

133

```python { .api }

134

def get_mapper(url='', check=False, create=False, **kwargs):

135

"""

136

Create a key-value store interface to a filesystem.

137

138

Parameters:

139

- url: str, filesystem URL (default: current directory)

140

- check: bool, check if path exists

141

- create: bool, create path if it doesn't exist

142

- **kwargs: additional options passed to filesystem

143

144

Returns:

145

FSMap, dictionary-like interface

146

"""

147

```

148

149

## Usage Patterns

150

151

### Basic Dictionary Operations

152

153

```python

154

# Create mapper for S3 bucket

155

mapper = fsspec.get_mapper('s3://bucket/data/')

156

157

# Write data like a dictionary

158

mapper['file1.txt'] = b'Hello, world!'

159

mapper['subdir/file2.json'] = b'{"key": "value"}'

160

161

# Read data like a dictionary

162

content = mapper['file1.txt']

163

print(content.decode()) # Hello, world!

164

165

# Check if key exists

166

if 'file1.txt' in mapper:

167

print('File exists')

168

169

# Delete files

170

del mapper['file1.txt']

171

172

# Get all keys

173

keys = list(mapper.keys())

174

```

175

176

### Bulk Operations

177

178

```python

179

# Write multiple files at once

180

data = {

181

'file1.txt': b'Content 1',

182

'file2.txt': b'Content 2',

183

'file3.txt': b'Content 3'

184

}

185

mapper.setitems(data)

186

187

# Read multiple files

188

contents = mapper.getitems(['file1.txt', 'file2.txt'])

189

190

# Handle missing files gracefully

191

contents = mapper.getitems(['file1.txt', 'missing.txt'], on_error='omit')

192

193

# Delete multiple files

194

mapper.delitems(['file1.txt', 'file2.txt'])

195

```

196

197

### Integration with Data Processing

198

199

```python

200

import json

201

import pickle

202

203

# JSON data storage

204

mapper = fsspec.get_mapper('s3://bucket/json-data/')

205

206

# Store JSON data

207

data = {'name': 'example', 'values': [1, 2, 3]}

208

mapper['config.json'] = json.dumps(data).encode()

209

210

# Load JSON data

211

raw_data = mapper['config.json']

212

config = json.loads(raw_data.decode())

213

214

# Binary data storage

215

binary_mapper = fsspec.get_mapper('gcs://bucket/models/')

216

217

# Store pickled model

218

import pickle

219

model = {'weights': [1.0, 2.0, 3.0], 'bias': 0.5}

220

binary_mapper['model.pkl'] = pickle.dumps(model)

221

222

# Load pickled model

223

model_data = binary_mapper['model.pkl']

224

loaded_model = pickle.loads(model_data)

225

```

226

227

### Working with Nested Structures

228

229

```python

230

# Create mapper with nested directory structure

231

mapper = fsspec.get_mapper('local:///data/experiment/')

232

233

# Organize data hierarchically using key paths

234

mapper['inputs/train.csv'] = train_data

235

mapper['inputs/test.csv'] = test_data

236

mapper['models/v1/weights.pkl'] = model_weights

237

mapper['models/v1/config.json'] = model_config

238

mapper['results/metrics.json'] = evaluation_metrics

239

240

# List all keys to see structure

241

for key in mapper:

242

print(key)

243

# inputs/train.csv

244

# inputs/test.csv

245

# models/v1/weights.pkl

246

# models/v1/config.json

247

# results/metrics.json

248

```

249

250

### Error Handling

251

252

```python

253

mapper = fsspec.get_mapper('s3://bucket/data/')

254

255

try:

256

# This will raise KeyError if file doesn't exist

257

content = mapper['nonexistent.txt']

258

except KeyError:

259

print('File not found')

260

261

# Use getitems for graceful handling

262

result = mapper.getitems(['file1.txt', 'missing.txt'], on_error='omit')

263

# Only existing files are returned

264

265

# Check existence before access

266

if 'uncertain_file.txt' in mapper:

267

content = mapper['uncertain_file.txt']

268

```

269

270

### Performance Optimization

271

272

```python

273

# Use bulk operations for better performance

274

keys_to_read = ['file1.txt', 'file2.txt', 'file3.txt']

275

276

# Efficient: single bulk operation

277

contents = mapper.getitems(keys_to_read)

278

279

# Inefficient: multiple individual operations

280

contents = {}

281

for key in keys_to_read:

282

contents[key] = mapper[key]

283

284

# Efficient bulk write

285

data_batch = {

286

f'batch_{i}.txt': f'Data {i}'.encode()

287

for i in range(100)

288

}

289

mapper.setitems(data_batch)

290

```

291

292

### Integration with Zarr and Array Libraries

293

294

```python

295

import zarr

296

297

# Create mapper for Zarr store

298

store = fsspec.get_mapper('s3://bucket/zarr-data.zarr')

299

300

# Create Zarr array using fsspec mapper

301

z = zarr.zeros((1000, 1000), chunks=(100, 100), store=store)

302

303

# Write data to array

304

z[:100, :100] = 1.0

305

306

# The zarr metadata and chunks are stored as files in the mapper

307

print(list(store.keys()))

308

# ['.zarray', '0.0', '0.1', '1.0', '1.1', ...]

309

```

310

311

### Caching and Local Access

312

313

```python

314

# Create cached mapper for better performance

315

cached_mapper = fsspec.get_mapper(

316

'simplecache::s3://bucket/data/',

317

s3={'key': 'ACCESS_KEY', 'secret': 'SECRET_KEY'},

318

cache_storage='/tmp/fsspec-cache'

319

)

320

321

# First access downloads and caches

322

data = cached_mapper['large_file.dat']

323

324

# Subsequent access reads from local cache

325

data = cached_mapper['large_file.dat'] # Much faster

326

```