or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mddataset.mddistributed.mdfileio.mdindex.mdlogging.mdmodels.mdoptimization.mdregistry.mdtraining.mdvisualization.md

fileio.mddocs/

0

# File I/O and Storage Backends

1

2

Unified file operations supporting multiple storage backends including local filesystem, HTTP, Petrel, LMDB, and Memcached with transparent backend switching and format-specific handlers. This system enables seamless file operations across different storage environments.

3

4

## Capabilities

5

6

### File Client

7

8

Unified client for file operations across different storage backends with transparent backend switching.

9

10

```python { .api }

11

class FileClient:

12

def __init__(self, backend: str = 'disk', **kwargs):

13

"""

14

Initialize FileClient with specified backend.

15

16

Parameters:

17

- backend: Backend type ('disk', 'petrel', 'memcached', 'lmdb', 'http')

18

- **kwargs: Backend-specific configuration options

19

"""

20

21

def get(self, filepath: str) -> bytes:

22

"""

23

Read file content as bytes.

24

25

Parameters:

26

- filepath: Path to file

27

28

Returns:

29

File content as bytes

30

"""

31

32

def get_text(self, filepath: str, encoding: str = 'utf-8') -> str:

33

"""

34

Read file content as text.

35

36

Parameters:

37

- filepath: Path to file

38

- encoding: Text encoding

39

40

Returns:

41

File content as string

42

"""

43

44

def put(self, obj: bytes, filepath: str):

45

"""

46

Write bytes to file.

47

48

Parameters:

49

- obj: Bytes to write

50

- filepath: Destination file path

51

"""

52

53

def put_text(self, obj: str, filepath: str, encoding: str = 'utf-8'):

54

"""

55

Write text to file.

56

57

Parameters:

58

- obj: Text to write

59

- filepath: Destination file path

60

- encoding: Text encoding

61

"""

62

63

def exists(self, filepath: str) -> bool:

64

"""

65

Check if file exists.

66

67

Parameters:

68

- filepath: File path to check

69

70

Returns:

71

True if file exists, False otherwise

72

"""

73

74

def isdir(self, filepath: str) -> bool:

75

"""

76

Check if path is directory.

77

78

Parameters:

79

- filepath: Path to check

80

81

Returns:

82

True if path is directory, False otherwise

83

"""

84

85

def isfile(self, filepath: str) -> bool:

86

"""

87

Check if path is file.

88

89

Parameters:

90

- filepath: Path to check

91

92

Returns:

93

True if path is file, False otherwise

94

"""

95

96

def list_dir_or_file(self, dir_path: str, list_dir: bool = True, list_file: bool = True, suffix: str = None, recursive: bool = False) -> list:

97

"""

98

List directory contents.

99

100

Parameters:

101

- dir_path: Directory path

102

- list_dir: Whether to list directories

103

- list_file: Whether to list files

104

- suffix: File suffix filter

105

- recursive: Whether to search recursively

106

107

Returns:

108

List of paths

109

"""

110

```

111

112

### Storage Backends

113

114

Various storage backend implementations for different storage systems.

115

116

```python { .api }

117

class BaseStorageBackend:

118

def get(self, filepath: str) -> bytes: ...

119

def get_text(self, filepath: str, encoding: str = 'utf-8') -> str: ...

120

def put(self, obj: bytes, filepath: str): ...

121

def put_text(self, obj: str, filepath: str, encoding: str = 'utf-8'): ...

122

def exists(self, filepath: str) -> bool: ...

123

def isdir(self, filepath: str) -> bool: ...

124

def isfile(self, filepath: str) -> bool: ...

125

126

class LocalBackend(BaseStorageBackend):

127

def __init__(self): ...

128

129

class HardDiskBackend(BaseStorageBackend):

130

def __init__(self): ...

131

132

class HTTPBackend(BaseStorageBackend):

133

def __init__(self): ...

134

135

class PetrelBackend(BaseStorageBackend):

136

def __init__(self, path_mapping: dict = None, enable_mc: bool = True, conf_path: str = None): ...

137

138

class MemcachedBackend(BaseStorageBackend):

139

def __init__(self, server_list_cfg: str, client_cfg: str, sys_path: str = None): ...

140

141

class LmdbBackend(BaseStorageBackend):

142

def __init__(self, db_path: str, readonly: bool = True, lock: bool = False, readahead: bool = False, **kwargs): ...

143

```

144

145

### High-Level File Operations

146

147

Convenient high-level functions for common file operations with automatic backend selection.

148

149

```python { .api }

150

def load(file: str, file_format: str = None, backend: str = 'disk', **kwargs):

151

"""

152

Load data from file with automatic format detection.

153

154

Parameters:

155

- file: File path or file-like object

156

- file_format: File format ('json', 'yaml', 'pkl')

157

- backend: Storage backend

158

- **kwargs: Additional arguments

159

160

Returns:

161

Loaded data

162

"""

163

164

def dump(obj, file: str = None, file_format: str = None, backend: str = 'disk', **kwargs):

165

"""

166

Dump data to file with automatic format detection.

167

168

Parameters:

169

- obj: Object to dump

170

- file: File path or file-like object

171

- file_format: File format ('json', 'yaml', 'pkl')

172

- backend: Storage backend

173

- **kwargs: Additional arguments

174

175

Returns:

176

Dumped string if file is None

177

"""

178

179

def exists(filepath: str, backend: str = 'disk') -> bool:

180

"""

181

Check if file exists.

182

183

Parameters:

184

- filepath: File path

185

- backend: Storage backend

186

187

Returns:

188

True if file exists

189

"""

190

191

def isdir(filepath: str, backend: str = 'disk') -> bool:

192

"""

193

Check if path is directory.

194

195

Parameters:

196

- filepath: Path to check

197

- backend: Storage backend

198

199

Returns:

200

True if path is directory

201

"""

202

203

def isfile(filepath: str, backend: str = 'disk') -> bool:

204

"""

205

Check if path is file.

206

207

Parameters:

208

- filepath: Path to check

209

- backend: Storage backend

210

211

Returns:

212

True if path is file

213

"""

214

215

def get(filepath: str, backend: str = 'disk') -> bytes:

216

"""

217

Get file content as bytes.

218

219

Parameters:

220

- filepath: File path

221

- backend: Storage backend

222

223

Returns:

224

File content as bytes

225

"""

226

227

def get_text(filepath: str, encoding: str = 'utf-8', backend: str = 'disk') -> str:

228

"""

229

Get file content as text.

230

231

Parameters:

232

- filepath: File path

233

- encoding: Text encoding

234

- backend: Storage backend

235

236

Returns:

237

File content as string

238

"""

239

240

def put(obj: bytes, filepath: str, backend: str = 'disk'):

241

"""

242

Put bytes to file.

243

244

Parameters:

245

- obj: Bytes to write

246

- filepath: Destination path

247

- backend: Storage backend

248

"""

249

250

def put_text(obj: str, filepath: str, encoding: str = 'utf-8', backend: str = 'disk'):

251

"""

252

Put text to file.

253

254

Parameters:

255

- obj: Text to write

256

- filepath: Destination path

257

- encoding: Text encoding

258

- backend: Storage backend

259

"""

260

```

261

262

### File Copy Operations

263

264

Functions for copying files and directories across different backends.

265

266

```python { .api }

267

def copyfile(src: str, dst: str, backend: str = 'disk'):

268

"""

269

Copy file from source to destination.

270

271

Parameters:

272

- src: Source file path

273

- dst: Destination file path

274

- backend: Storage backend

275

"""

276

277

def copyfile_from_local(src: str, dst: str, backend: str = 'disk'):

278

"""

279

Copy file from local to remote backend.

280

281

Parameters:

282

- src: Local source file path

283

- dst: Remote destination path

284

- backend: Remote storage backend

285

"""

286

287

def copyfile_to_local(src: str, dst: str, backend: str = 'disk'):

288

"""

289

Copy file from remote backend to local.

290

291

Parameters:

292

- src: Remote source file path

293

- dst: Local destination path

294

- backend: Remote storage backend

295

"""

296

297

def copytree(src: str, dst: str, backend: str = 'disk'):

298

"""

299

Copy directory tree.

300

301

Parameters:

302

- src: Source directory path

303

- dst: Destination directory path

304

- backend: Storage backend

305

"""

306

```

307

308

### File Format Handlers

309

310

Extensible system for handling different file formats with registration support.

311

312

```python { .api }

313

class BaseFileHandler:

314

def load_from_fileobj(self, file, **kwargs): ...

315

def dump_to_fileobj(self, obj, file, **kwargs): ...

316

def load_from_path(self, filepath: str, **kwargs): ...

317

def dump_to_path(self, obj, filepath: str, **kwargs): ...

318

319

class JsonHandler(BaseFileHandler):

320

def load_from_fileobj(self, file, **kwargs): ...

321

def dump_to_fileobj(self, obj, file, **kwargs): ...

322

323

class PickleHandler(BaseFileHandler):

324

def load_from_fileobj(self, file, **kwargs): ...

325

def dump_to_fileobj(self, obj, file, **kwargs): ...

326

327

class YamlHandler(BaseFileHandler):

328

def load_from_fileobj(self, file, **kwargs): ...

329

def dump_to_fileobj(self, obj, file, **kwargs): ...

330

331

def register_handler(handler: BaseFileHandler, file_formats: list):

332

"""

333

Register file format handler.

334

335

Parameters:

336

- handler: Handler instance

337

- file_formats: List of supported file formats

338

"""

339

340

def register_backend(name: str, backend: BaseStorageBackend = None, force: bool = False, prefixes: str = None):

341

"""

342

Register storage backend.

343

344

Parameters:

345

- name: Backend name

346

- backend: Backend class or instance

347

- force: Whether to override existing backend

348

- prefixes: URL prefixes handled by backend

349

"""

350

```

351

352

### File Parsing Utilities

353

354

Utilities for loading structured data from files.

355

356

```python { .api }

357

def list_from_file(filename: str, prefix: str = '', offset: int = 0, max_num: int = 0, encoding: str = 'utf-8', backend: str = 'disk') -> list:

358

"""

359

Load list from file with each line as an element.

360

361

Parameters:

362

- filename: File path

363

- prefix: Prefix to add to each line

364

- offset: Line offset to start reading

365

- max_num: Maximum number of lines to read

366

- encoding: Text encoding

367

- backend: Storage backend

368

369

Returns:

370

List of lines

371

"""

372

373

def dict_from_file(filename: str, key_type: type = str, encoding: str = 'utf-8', backend: str = 'disk') -> dict:

374

"""

375

Load dictionary from file.

376

377

Parameters:

378

- filename: File path

379

- key_type: Type to convert keys

380

- encoding: Text encoding

381

- backend: Storage backend

382

383

Returns:

384

Dictionary loaded from file

385

"""

386

```

387

388

## Usage Examples

389

390

### Basic File Operations

391

392

```python

393

from mmengine import fileio

394

395

# Load JSON data

396

data = fileio.load('config.json')

397

398

# Save data as JSON

399

fileio.dump(data, 'output.json')

400

401

# Check if file exists

402

if fileio.exists('data.pkl'):

403

data = fileio.load('data.pkl')

404

405

# Read text file

406

content = fileio.get_text('readme.txt')

407

```

408

409

### Using Different Backends

410

411

```python

412

from mmengine.fileio import FileClient

413

414

# Local filesystem

415

client = FileClient('disk')

416

data = client.get('local_file.txt')

417

418

# HTTP backend

419

client = FileClient('http')

420

content = client.get('https://example.com/data.json')

421

422

# Petrel backend (for cloud storage)

423

client = FileClient('petrel', path_mapping={'s3://bucket': '/path/to/local'})

424

data = client.get('s3://bucket/data.pkl')

425

```

426

427

### Cross-Backend File Copying

428

429

```python

430

from mmengine import fileio

431

432

# Copy from local to remote

433

fileio.copyfile_from_local('local_data.json', 's3://bucket/remote_data.json', backend='petrel')

434

435

# Copy from remote to local

436

fileio.copyfile_to_local('s3://bucket/remote_data.json', 'local_copy.json', backend='petrel')

437

438

# Copy entire directory

439

fileio.copytree('local_dir/', 's3://bucket/remote_dir/', backend='petrel')

440

```