or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli.mdcore-operations.mdexceptions.mdindex.mdio-callbacks.md

io-callbacks.mddocs/

0

# I/O System and Callbacks

1

2

py7zr provides a flexible I/O abstraction layer and callback system that enables custom extraction destinations, progress monitoring, and pluggable storage backends. The I/O system supports file-based, memory-based, and custom implementations, while callbacks provide real-time progress reporting during archive operations.

3

4

## Capabilities

5

6

### I/O Abstraction Layer

7

8

Base abstract class defining the I/O interface for archive operations.

9

10

```python { .api }

11

class Py7zIO:

12

"""

13

Abstract base class for py7zr I/O operations.

14

15

Provides interface for reading, writing, and seeking operations

16

used during archive extraction and creation.

17

"""

18

19

def write(self, s):

20

"""

21

Write bytes to the I/O stream.

22

23

Parameters:

24

- s: bytes, data to write

25

26

Returns:

27

int: number of bytes written

28

"""

29

30

def read(self, size=None):

31

"""

32

Read bytes from the I/O stream.

33

34

Parameters:

35

- size: int, number of bytes to read (None for all)

36

37

Returns:

38

bytes: data read from stream

39

"""

40

41

def seek(self, offset, whence=0):

42

"""

43

Change stream position.

44

45

Parameters:

46

- offset: int, offset in bytes

47

- whence: int, reference point (0=start, 1=current, 2=end)

48

49

Returns:

50

int: new absolute position

51

"""

52

53

def flush(self):

54

"""

55

Flush any buffered write data.

56

"""

57

58

def size(self):

59

"""

60

Get total size of the stream.

61

62

Returns:

63

int: stream size in bytes

64

"""

65

```

66

67

### Concrete I/O Implementations

68

69

Ready-to-use I/O implementations for common scenarios.

70

71

```python { .api }

72

class HashIO(Py7zIO):

73

"""

74

I/O wrapper that computes hash while writing.

75

76

Useful for verifying file integrity during extraction.

77

"""

78

def __init__(self, filename): ...

79

80

class Py7zBytesIO(Py7zIO):

81

"""

82

Memory-based I/O with size limits.

83

84

Stores data in memory with optional size constraints.

85

"""

86

def __init__(self, filename, limit=None): ...

87

88

class NullIO(Py7zIO):

89

"""

90

Null device I/O that discards all writes.

91

92

Useful for testing or when only checking archive contents.

93

"""

94

def __init__(self): ...

95

96

class MemIO(Py7zIO):

97

"""

98

Memory-based I/O with factory pattern.

99

100

Combines memory storage with factory-based creation.

101

"""

102

def __init__(self, fname, factory): ...

103

104

class Buffer:

105

"""

106

Utility buffer for byte operations.

107

"""

108

def __init__(self, size=16): ...

109

```

110

111

#### Usage Examples

112

113

```python

114

import py7zr

115

from py7zr import HashIO, Py7zBytesIO, NullIO

116

117

# Extract to memory with size limit

118

class MemoryFactory(py7zr.WriterFactory):

119

def create(self, filename):

120

return Py7zBytesIO(filename, limit=1024*1024) # 1MB limit

121

122

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

123

archive.extractall(factory=MemoryFactory())

124

125

# Extract with hash verification

126

class HashFactory(py7zr.WriterFactory):

127

def create(self, filename):

128

return HashIO(filename)

129

130

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

131

archive.extractall(factory=HashFactory())

132

133

# Test extraction without writing files

134

class TestFactory(py7zr.WriterFactory):

135

def create(self, filename):

136

return NullIO()

137

138

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

139

archive.extractall(factory=TestFactory())

140

```

141

142

### Factory Pattern

143

144

Factory classes for creating I/O instances during archive operations.

145

146

```python { .api }

147

class WriterFactory:

148

"""

149

Abstract factory for creating Py7zIO writers.

150

151

Enables custom I/O backend selection during extraction.

152

"""

153

def create(self, filename):

154

"""

155

Create I/O writer for specified filename.

156

157

Parameters:

158

- filename: str, target filename

159

160

Returns:

161

Py7zIO: I/O instance for the file

162

"""

163

164

class HashIOFactory(WriterFactory):

165

"""Factory for creating HashIO instances."""

166

def create(self, filename): ...

167

168

class BytesIOFactory(WriterFactory):

169

"""Factory for creating BytesIO instances with size limits."""

170

def __init__(self, limit=None): ...

171

def create(self, filename): ...

172

173

class NullIOFactory(WriterFactory):

174

"""Factory for creating NullIO instances."""

175

def create(self, filename): ...

176

```

177

178

#### Custom Factory Example

179

180

```python

181

import py7zr

182

from py7zr import WriterFactory, Py7zIO

183

import os

184

185

class CustomFileFactory(WriterFactory):

186

"""Custom factory that creates files with specific permissions."""

187

188

def __init__(self, base_path, permissions=0o644):

189

self.base_path = base_path

190

self.permissions = permissions

191

192

def create(self, filename):

193

full_path = os.path.join(self.base_path, filename)

194

os.makedirs(os.path.dirname(full_path), exist_ok=True)

195

196

class CustomFileIO(Py7zIO):

197

def __init__(self, path, perms):

198

self.path = path

199

self.perms = perms

200

self.file = open(path, 'wb')

201

202

def write(self, data):

203

return self.file.write(data)

204

205

def close(self):

206

self.file.close()

207

os.chmod(self.path, self.perms)

208

209

return CustomFileIO(full_path, self.permissions)

210

211

# Use custom factory

212

factory = CustomFileFactory('/tmp/extracted', permissions=0o755)

213

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

214

archive.extractall(factory=factory)

215

```

216

217

### Callback System

218

219

Progress reporting and event handling during archive operations.

220

221

```python { .api }

222

class Callback:

223

"""

224

Abstract base class for operation callbacks.

225

226

Provides hooks for monitoring and controlling archive operations.

227

"""

228

229

def report_start_preparation(self):

230

"""

231

Called at the start of operation preparation phase.

232

"""

233

234

def report_start(self, processing_file_path, processing_bytes):

235

"""

236

Called when starting to process a file.

237

238

Parameters:

239

- processing_file_path: str, path of file being processed

240

- processing_bytes: int, total bytes to process

241

"""

242

243

def report_update(self, decompressed_bytes):

244

"""

245

Called periodically during processing with progress info.

246

247

Parameters:

248

- decompressed_bytes: int, bytes processed so far

249

"""

250

251

def report_end(self, processing_file_path, wrote_bytes):

252

"""

253

Called when file processing is complete.

254

255

Parameters:

256

- processing_file_path: str, path of processed file

257

- wrote_bytes: int, total bytes written

258

"""

259

260

def report_warning(self, message):

261

"""

262

Called when a warning occurs during processing.

263

264

Parameters:

265

- message: str, warning message

266

"""

267

268

def report_postprocess(self):

269

"""

270

Called during post-processing phase.

271

"""

272

```

273

274

### Concrete Callback Implementations

275

276

Pre-built callback implementations for common use cases.

277

278

```python { .api }

279

class ExtractCallback(Callback):

280

"""

281

Default callback implementation for extraction operations.

282

283

Provides basic progress reporting to stdout.

284

"""

285

286

class ArchiveCallback(Callback):

287

"""

288

Default callback implementation for archive creation operations.

289

290

Provides basic progress reporting to stdout.

291

"""

292

```

293

294

#### Custom Callback Examples

295

296

```python

297

import py7zr

298

from py7zr import Callback

299

300

class ProgressCallback(Callback):

301

"""Custom callback with progress bar."""

302

303

def __init__(self):

304

self.current_file = None

305

self.total_bytes = 0

306

self.processed_bytes = 0

307

308

def report_start_preparation(self):

309

print("Preparing archive operation...")

310

311

def report_start(self, processing_file_path, processing_bytes):

312

self.current_file = processing_file_path

313

self.total_bytes = processing_bytes

314

self.processed_bytes = 0

315

print(f"Processing: {processing_file_path}")

316

317

def report_update(self, decompressed_bytes):

318

self.processed_bytes += decompressed_bytes

319

if self.total_bytes > 0:

320

percent = (self.processed_bytes / self.total_bytes) * 100

321

print(f"Progress: {percent:.1f}% ({self.processed_bytes}/{self.total_bytes} bytes)")

322

323

def report_end(self, processing_file_path, wrote_bytes):

324

print(f"Completed: {processing_file_path} ({wrote_bytes} bytes)")

325

326

def report_warning(self, message):

327

print(f"Warning: {message}")

328

329

# Use custom callback

330

callback = ProgressCallback()

331

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

332

archive.extractall(callback=callback)

333

```

334

335

```python

336

class LoggingCallback(Callback):

337

"""Callback that logs to file."""

338

339

def __init__(self, log_file):

340

self.log_file = log_file

341

342

def report_start(self, processing_file_path, processing_bytes):

343

with open(self.log_file, 'a') as f:

344

f.write(f"START: {processing_file_path} ({processing_bytes} bytes)\\n")

345

346

def report_end(self, processing_file_path, wrote_bytes):

347

with open(self.log_file, 'a') as f:

348

f.write(f"END: {processing_file_path} ({wrote_bytes} bytes)\\n")

349

350

def report_warning(self, message):

351

with open(self.log_file, 'a') as f:

352

f.write(f"WARNING: {message}\\n")

353

354

# Use logging callback

355

callback = LoggingCallback('extraction.log')

356

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

357

archive.extractall(callback=callback)

358

```

359

360

### Advanced I/O Patterns

361

362

Complex usage patterns combining I/O and callbacks.

363

364

#### Streaming Extraction to Network

365

366

```python

367

import py7zr

368

import socket

369

from py7zr import WriterFactory, Py7zIO

370

371

class NetworkIO(Py7zIO):

372

"""Stream extracted files over network."""

373

374

def __init__(self, filename, socket_conn):

375

self.filename = filename

376

self.socket = socket_conn

377

self.bytes_sent = 0

378

379

def write(self, data):

380

# Send filename header first time

381

if self.bytes_sent == 0:

382

header = f"FILE:{self.filename}\\n".encode()

383

self.socket.send(header)

384

385

self.socket.send(data)

386

self.bytes_sent += len(data)

387

return len(data)

388

389

class NetworkFactory(WriterFactory):

390

def __init__(self, socket_conn):

391

self.socket = socket_conn

392

393

def create(self, filename):

394

return NetworkIO(filename, self.socket)

395

396

# Extract over network

397

with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:

398

s.connect(('remote_host', 8080))

399

factory = NetworkFactory(s)

400

401

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

402

archive.extractall(factory=factory)

403

```

404

405

#### Conditional Extraction with Callbacks

406

407

```python

408

class SelectiveCallback(Callback):

409

"""Callback that can skip files based on criteria."""

410

411

def __init__(self, max_file_size=1024*1024):

412

self.max_file_size = max_file_size

413

self.skip_current = False

414

415

def report_start(self, processing_file_path, processing_bytes):

416

if processing_bytes > self.max_file_size:

417

print(f"Skipping large file: {processing_file_path} ({processing_bytes} bytes)")

418

self.skip_current = True

419

return False # Skip this file

420

else:

421

self.skip_current = False

422

return True # Process this file

423

424

# Note: Actual file skipping requires integration with extraction logic

425

```

426

427

## Integration Examples

428

429

### With Progress Bars (tqdm)

430

431

```python

432

import py7zr

433

from py7zr import Callback

434

from tqdm import tqdm

435

436

class TqdmCallback(Callback):

437

def __init__(self):

438

self.pbar = None

439

440

def report_start(self, processing_file_path, processing_bytes):

441

self.pbar = tqdm(total=processing_bytes,

442

desc=f"Extracting {processing_file_path}",

443

unit='B', unit_scale=True)

444

445

def report_update(self, decompressed_bytes):

446

if self.pbar:

447

self.pbar.update(decompressed_bytes)

448

449

def report_end(self, processing_file_path, wrote_bytes):

450

if self.pbar:

451

self.pbar.close()

452

453

# Extract with progress bar

454

callback = TqdmCallback()

455

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

456

archive.extractall(callback=callback)

457

```

458

459

### With Cloud Storage

460

461

```python

462

import py7zr

463

from py7zr import WriterFactory, Py7zIO

464

import boto3

465

466

class S3IO(Py7zIO):

467

"""Upload extracted files directly to S3."""

468

469

def __init__(self, filename, s3_client, bucket, prefix=""):

470

self.filename = filename

471

self.s3_client = s3_client

472

self.bucket = bucket

473

self.key = f"{prefix}/{filename}" if prefix else filename

474

self.buffer = BytesIO()

475

476

def write(self, data):

477

return self.buffer.write(data)

478

479

def close(self):

480

self.buffer.seek(0)

481

self.s3_client.upload_fileobj(self.buffer, self.bucket, self.key)

482

483

class S3Factory(WriterFactory):

484

def __init__(self, bucket, prefix=""):

485

self.s3_client = boto3.client('s3')

486

self.bucket = bucket

487

self.prefix = prefix

488

489

def create(self, filename):

490

return S3IO(filename, self.s3_client, self.bucket, self.prefix)

491

492

# Extract directly to S3

493

factory = S3Factory('my-bucket', 'extracted-files')

494

with py7zr.SevenZipFile('archive.7z', 'r') as archive:

495

archive.extractall(factory=factory)

496

```