0
# File I/O and Storage Backends
1
2
Unified file operations supporting multiple storage backends including local filesystem, HTTP, Petrel, LMDB, and Memcached with transparent backend switching and format-specific handlers. This system enables seamless file operations across different storage environments.
3
4
## Capabilities
5
6
### File Client
7
8
Unified client for file operations across different storage backends with transparent backend switching.
9
10
```python { .api }
11
class FileClient:
12
def __init__(self, backend: str = 'disk', **kwargs):
13
"""
14
Initialize FileClient with specified backend.
15
16
Parameters:
17
- backend: Backend type ('disk', 'petrel', 'memcached', 'lmdb', 'http')
18
- **kwargs: Backend-specific configuration options
19
"""
20
21
def get(self, filepath: str) -> bytes:
22
"""
23
Read file content as bytes.
24
25
Parameters:
26
- filepath: Path to file
27
28
Returns:
29
File content as bytes
30
"""
31
32
def get_text(self, filepath: str, encoding: str = 'utf-8') -> str:
33
"""
34
Read file content as text.
35
36
Parameters:
37
- filepath: Path to file
38
- encoding: Text encoding
39
40
Returns:
41
File content as string
42
"""
43
44
def put(self, obj: bytes, filepath: str):
45
"""
46
Write bytes to file.
47
48
Parameters:
49
- obj: Bytes to write
50
- filepath: Destination file path
51
"""
52
53
def put_text(self, obj: str, filepath: str, encoding: str = 'utf-8'):
54
"""
55
Write text to file.
56
57
Parameters:
58
- obj: Text to write
59
- filepath: Destination file path
60
- encoding: Text encoding
61
"""
62
63
def exists(self, filepath: str) -> bool:
64
"""
65
Check if file exists.
66
67
Parameters:
68
- filepath: File path to check
69
70
Returns:
71
True if file exists, False otherwise
72
"""
73
74
def isdir(self, filepath: str) -> bool:
75
"""
76
Check if path is directory.
77
78
Parameters:
79
- filepath: Path to check
80
81
Returns:
82
True if path is directory, False otherwise
83
"""
84
85
def isfile(self, filepath: str) -> bool:
86
"""
87
Check if path is file.
88
89
Parameters:
90
- filepath: Path to check
91
92
Returns:
93
True if path is file, False otherwise
94
"""
95
96
def list_dir_or_file(self, dir_path: str, list_dir: bool = True, list_file: bool = True, suffix: str = None, recursive: bool = False) -> list:
97
"""
98
List directory contents.
99
100
Parameters:
101
- dir_path: Directory path
102
- list_dir: Whether to list directories
103
- list_file: Whether to list files
104
- suffix: File suffix filter
105
- recursive: Whether to search recursively
106
107
Returns:
108
List of paths
109
"""
110
```
111
112
### Storage Backends
113
114
Various storage backend implementations for different storage systems.
115
116
```python { .api }
117
class BaseStorageBackend:
118
def get(self, filepath: str) -> bytes: ...
119
def get_text(self, filepath: str, encoding: str = 'utf-8') -> str: ...
120
def put(self, obj: bytes, filepath: str): ...
121
def put_text(self, obj: str, filepath: str, encoding: str = 'utf-8'): ...
122
def exists(self, filepath: str) -> bool: ...
123
def isdir(self, filepath: str) -> bool: ...
124
def isfile(self, filepath: str) -> bool: ...
125
126
class LocalBackend(BaseStorageBackend):
127
def __init__(self): ...
128
129
class HardDiskBackend(BaseStorageBackend):
130
def __init__(self): ...
131
132
class HTTPBackend(BaseStorageBackend):
133
def __init__(self): ...
134
135
class PetrelBackend(BaseStorageBackend):
136
def __init__(self, path_mapping: dict = None, enable_mc: bool = True, conf_path: str = None): ...
137
138
class MemcachedBackend(BaseStorageBackend):
139
def __init__(self, server_list_cfg: str, client_cfg: str, sys_path: str = None): ...
140
141
class LmdbBackend(BaseStorageBackend):
142
def __init__(self, db_path: str, readonly: bool = True, lock: bool = False, readahead: bool = False, **kwargs): ...
143
```
144
145
### High-Level File Operations
146
147
Convenient high-level functions for common file operations with automatic backend selection.
148
149
```python { .api }
150
def load(file: str, file_format: str = None, backend: str = 'disk', **kwargs):
151
"""
152
Load data from file with automatic format detection.
153
154
Parameters:
155
- file: File path or file-like object
156
- file_format: File format ('json', 'yaml', 'pkl')
157
- backend: Storage backend
158
- **kwargs: Additional arguments
159
160
Returns:
161
Loaded data
162
"""
163
164
def dump(obj, file: str = None, file_format: str = None, backend: str = 'disk', **kwargs):
165
"""
166
Dump data to file with automatic format detection.
167
168
Parameters:
169
- obj: Object to dump
170
- file: File path or file-like object
171
- file_format: File format ('json', 'yaml', 'pkl')
172
- backend: Storage backend
173
- **kwargs: Additional arguments
174
175
Returns:
176
Dumped string if file is None
177
"""
178
179
def exists(filepath: str, backend: str = 'disk') -> bool:
180
"""
181
Check if file exists.
182
183
Parameters:
184
- filepath: File path
185
- backend: Storage backend
186
187
Returns:
188
True if file exists
189
"""
190
191
def isdir(filepath: str, backend: str = 'disk') -> bool:
192
"""
193
Check if path is directory.
194
195
Parameters:
196
- filepath: Path to check
197
- backend: Storage backend
198
199
Returns:
200
True if path is directory
201
"""
202
203
def isfile(filepath: str, backend: str = 'disk') -> bool:
204
"""
205
Check if path is file.
206
207
Parameters:
208
- filepath: Path to check
209
- backend: Storage backend
210
211
Returns:
212
True if path is file
213
"""
214
215
def get(filepath: str, backend: str = 'disk') -> bytes:
216
"""
217
Get file content as bytes.
218
219
Parameters:
220
- filepath: File path
221
- backend: Storage backend
222
223
Returns:
224
File content as bytes
225
"""
226
227
def get_text(filepath: str, encoding: str = 'utf-8', backend: str = 'disk') -> str:
228
"""
229
Get file content as text.
230
231
Parameters:
232
- filepath: File path
233
- encoding: Text encoding
234
- backend: Storage backend
235
236
Returns:
237
File content as string
238
"""
239
240
def put(obj: bytes, filepath: str, backend: str = 'disk'):
241
"""
242
Put bytes to file.
243
244
Parameters:
245
- obj: Bytes to write
246
- filepath: Destination path
247
- backend: Storage backend
248
"""
249
250
def put_text(obj: str, filepath: str, encoding: str = 'utf-8', backend: str = 'disk'):
251
"""
252
Put text to file.
253
254
Parameters:
255
- obj: Text to write
256
- filepath: Destination path
257
- encoding: Text encoding
258
- backend: Storage backend
259
"""
260
```
261
262
### File Copy Operations
263
264
Functions for copying files and directories across different backends.
265
266
```python { .api }
267
def copyfile(src: str, dst: str, backend: str = 'disk'):
268
"""
269
Copy file from source to destination.
270
271
Parameters:
272
- src: Source file path
273
- dst: Destination file path
274
- backend: Storage backend
275
"""
276
277
def copyfile_from_local(src: str, dst: str, backend: str = 'disk'):
278
"""
279
Copy file from local to remote backend.
280
281
Parameters:
282
- src: Local source file path
283
- dst: Remote destination path
284
- backend: Remote storage backend
285
"""
286
287
def copyfile_to_local(src: str, dst: str, backend: str = 'disk'):
288
"""
289
Copy file from remote backend to local.
290
291
Parameters:
292
- src: Remote source file path
293
- dst: Local destination path
294
- backend: Remote storage backend
295
"""
296
297
def copytree(src: str, dst: str, backend: str = 'disk'):
298
"""
299
Copy directory tree.
300
301
Parameters:
302
- src: Source directory path
303
- dst: Destination directory path
304
- backend: Storage backend
305
"""
306
```
307
308
### File Format Handlers
309
310
Extensible system for handling different file formats with registration support.
311
312
```python { .api }
313
class BaseFileHandler:
314
def load_from_fileobj(self, file, **kwargs): ...
315
def dump_to_fileobj(self, obj, file, **kwargs): ...
316
def load_from_path(self, filepath: str, **kwargs): ...
317
def dump_to_path(self, obj, filepath: str, **kwargs): ...
318
319
class JsonHandler(BaseFileHandler):
320
def load_from_fileobj(self, file, **kwargs): ...
321
def dump_to_fileobj(self, obj, file, **kwargs): ...
322
323
class PickleHandler(BaseFileHandler):
324
def load_from_fileobj(self, file, **kwargs): ...
325
def dump_to_fileobj(self, obj, file, **kwargs): ...
326
327
class YamlHandler(BaseFileHandler):
328
def load_from_fileobj(self, file, **kwargs): ...
329
def dump_to_fileobj(self, obj, file, **kwargs): ...
330
331
def register_handler(handler: BaseFileHandler, file_formats: list):
332
"""
333
Register file format handler.
334
335
Parameters:
336
- handler: Handler instance
337
- file_formats: List of supported file formats
338
"""
339
340
def register_backend(name: str, backend: BaseStorageBackend = None, force: bool = False, prefixes: str = None):
341
"""
342
Register storage backend.
343
344
Parameters:
345
- name: Backend name
346
- backend: Backend class or instance
347
- force: Whether to override existing backend
348
- prefixes: URL prefixes handled by backend
349
"""
350
```
351
352
### File Parsing Utilities
353
354
Utilities for loading structured data from files.
355
356
```python { .api }
357
def list_from_file(filename: str, prefix: str = '', offset: int = 0, max_num: int = 0, encoding: str = 'utf-8', backend: str = 'disk') -> list:
358
"""
359
Load list from file with each line as an element.
360
361
Parameters:
362
- filename: File path
363
- prefix: Prefix to add to each line
364
- offset: Line offset to start reading
365
- max_num: Maximum number of lines to read
366
- encoding: Text encoding
367
- backend: Storage backend
368
369
Returns:
370
List of lines
371
"""
372
373
def dict_from_file(filename: str, key_type: type = str, encoding: str = 'utf-8', backend: str = 'disk') -> dict:
374
"""
375
Load dictionary from file.
376
377
Parameters:
378
- filename: File path
379
- key_type: Type to convert keys
380
- encoding: Text encoding
381
- backend: Storage backend
382
383
Returns:
384
Dictionary loaded from file
385
"""
386
```
387
388
## Usage Examples
389
390
### Basic File Operations
391
392
```python
393
from mmengine import fileio
394
395
# Load JSON data
396
data = fileio.load('config.json')
397
398
# Save data as JSON
399
fileio.dump(data, 'output.json')
400
401
# Check if file exists
402
if fileio.exists('data.pkl'):
403
data = fileio.load('data.pkl')
404
405
# Read text file
406
content = fileio.get_text('readme.txt')
407
```
408
409
### Using Different Backends
410
411
```python
412
from mmengine.fileio import FileClient
413
414
# Local filesystem
415
client = FileClient('disk')
416
data = client.get('local_file.txt')
417
418
# HTTP backend
419
client = FileClient('http')
420
content = client.get('https://example.com/data.json')
421
422
# Petrel backend (for cloud storage)
423
client = FileClient('petrel', path_mapping={'s3://bucket': '/path/to/local'})
424
data = client.get('s3://bucket/data.pkl')
425
```
426
427
### Cross-Backend File Copying
428
429
```python
430
from mmengine import fileio
431
432
# Copy from local to remote
433
fileio.copyfile_from_local('local_data.json', 's3://bucket/remote_data.json', backend='petrel')
434
435
# Copy from remote to local
436
fileio.copyfile_to_local('s3://bucket/remote_data.json', 'local_copy.json', backend='petrel')
437
438
# Copy entire directory
439
fileio.copytree('local_dir/', 's3://bucket/remote_dir/', backend='petrel')
440
```