or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-compression.mdadvanced-decompression.mdbuffer-operations.mddictionary-compression.mdframe-analysis.mdindex.mdsimple-operations.md

advanced-compression.mddocs/

0

# Advanced Compression

1

2

Sophisticated compression capabilities including customizable parameters, streaming interfaces, dictionary support, and multi-threading for high-performance applications.

3

4

## Capabilities

5

6

### ZstdCompressor

7

8

Main compression class providing full control over compression parameters and advanced compression modes.

9

10

```python { .api }

11

class ZstdCompressor:

12

def __init__(

13

self,

14

level: int = 3,

15

dict_data: ZstdCompressionDict = None,

16

compression_params: ZstdCompressionParameters = None,

17

write_checksum: bool = None,

18

write_content_size: bool = None,

19

write_dict_id: bool = None,

20

threads: int = 0

21

):

22

"""

23

Create a compression context.

24

25

Parameters:

26

- level: int, compression level (1-22, default 3)

27

- dict_data: ZstdCompressionDict, compression dictionary

28

- compression_params: ZstdCompressionParameters, detailed parameters

29

- write_checksum: bool, include integrity checksum

30

- write_content_size: bool, write original size in frame header

31

- write_dict_id: bool, write dictionary ID in frame header

32

- threads: int, number of threads for compression (0 = auto)

33

"""

34

35

def memory_size(self) -> int:

36

"""Get memory usage of compression context in bytes."""

37

38

def compress(self, data: bytes) -> bytes:

39

"""

40

Compress data in one operation.

41

42

Parameters:

43

- data: bytes-like object to compress

44

45

Returns:

46

bytes: Compressed data

47

"""

48

49

def compressobj(self, size: int = -1) -> ZstdCompressionObj:

50

"""

51

Create a compression object for streaming operations.

52

53

Parameters:

54

- size: int, hint about total size of data to compress

55

56

Returns:

57

ZstdCompressionObj: Streaming compression object

58

"""

59

60

def chunker(self, size: int = -1, chunk_size: int = -1) -> ZstdCompressionChunker:

61

"""

62

Create a compression chunker for processing data in chunks.

63

64

Parameters:

65

- size: int, hint about total size of data

66

- chunk_size: int, preferred chunk size

67

68

Returns:

69

ZstdCompressionChunker: Chunking compression interface

70

"""

71

```

72

73

**Usage Example:**

74

75

```python

76

import zstandard as zstd

77

78

# Basic compressor

79

compressor = zstd.ZstdCompressor(level=10)

80

compressed = compressor.compress(b"Data to compress")

81

82

# High-performance compressor with threading

83

compressor = zstd.ZstdCompressor(level=3, threads=4)

84

compressed = compressor.compress(large_data)

85

86

# Memory usage monitoring

87

print(f"Compressor memory usage: {compressor.memory_size()} bytes")

88

```

89

90

### Streaming Compression

91

92

Stream-based compression for handling large data without loading everything into memory.

93

94

```python { .api }

95

class ZstdCompressor:

96

def stream_writer(

97

self,

98

writer,

99

size: int = -1,

100

write_size: int = -1,

101

write_return_read: bool = False,

102

closefd: bool = True

103

) -> ZstdCompressionWriter:

104

"""

105

Create a streaming compression writer.

106

107

Parameters:

108

- writer: file-like object to write compressed data to

109

- size: int, hint about total size of data

110

- write_size: int, preferred write size

111

- write_return_read: bool, return read count instead of write count

112

- closefd: bool, whether to close writer when done

113

114

Returns:

115

ZstdCompressionWriter: Streaming compression writer

116

"""

117

118

def stream_reader(

119

self,

120

source,

121

size: int = -1,

122

read_size: int = -1,

123

closefd: bool = True

124

) -> ZstdCompressionReader:

125

"""

126

Create a streaming compression reader.

127

128

Parameters:

129

- source: file-like object or bytes to read from

130

- size: int, hint about total size of data

131

- read_size: int, preferred read size

132

- closefd: bool, whether to close source when done

133

134

Returns:

135

ZstdCompressionReader: Streaming compression reader

136

"""

137

138

def copy_stream(

139

self,

140

ifh,

141

ofh,

142

size: int = -1,

143

read_size: int = -1,

144

write_size: int = -1

145

) -> tuple[int, int]:

146

"""

147

Copy and compress data between streams.

148

149

Parameters:

150

- ifh: input file-like object

151

- ofh: output file-like object

152

- size: int, hint about total size

153

- read_size: int, read buffer size

154

- write_size: int, write buffer size

155

156

Returns:

157

tuple[int, int]: (bytes_read, bytes_written)

158

"""

159

```

160

161

**Usage Examples:**

162

163

```python

164

import zstandard as zstd

165

import io

166

167

compressor = zstd.ZstdCompressor(level=5)

168

169

# Stream writer - compress data as you write

170

output = io.BytesIO()

171

with compressor.stream_writer(output) as writer:

172

writer.write(b"First chunk of data")

173

writer.write(b"Second chunk of data")

174

writer.write(b"Final chunk")

175

176

compressed_data = output.getvalue()

177

178

# Stream reader - compress data as you read

179

data = b"Large amount of data to compress"

180

reader = compressor.stream_reader(io.BytesIO(data))

181

compressed_chunks = []

182

while True:

183

chunk = reader.read(8192)

184

if not chunk:

185

break

186

compressed_chunks.append(chunk)

187

188

# Copy between streams with compression

189

with open('input.txt', 'rb') as input_file, \

190

open('output.zst', 'wb') as output_file:

191

bytes_read, bytes_written = compressor.copy_stream(input_file, output_file)

192

print(f"Read {bytes_read} bytes, wrote {bytes_written} bytes")

193

```

194

195

### Iterative Compression

196

197

Compress data in chunks and yield compressed output incrementally, useful for processing large data streams.

198

199

```python { .api }

200

class ZstdCompressor:

201

def read_to_iter(

202

self,

203

reader,

204

size: int = -1,

205

read_size: int = -1,

206

write_size: int = -1

207

) -> Generator[bytes, None, None]:

208

"""

209

Compress data from reader and yield compressed chunks.

210

211

Parameters:

212

- reader: file-like object or bytes to read from

213

- size: int, hint about total size of data

214

- read_size: int, read buffer size

215

- write_size: int, write buffer size

216

217

Yields:

218

bytes: Compressed data chunks

219

"""

220

```

221

222

**Usage Example:**

223

224

```python

225

import zstandard as zstd

226

import io

227

228

compressor = zstd.ZstdCompressor(level=5)

229

230

# Process large data iteratively

231

large_data = b"Very large data content that needs streaming compression..."

232

reader = io.BytesIO(large_data)

233

234

# Compress and process chunks as they're produced

235

compressed_chunks = []

236

for chunk in compressor.read_to_iter(reader):

237

compressed_chunks.append(chunk)

238

# Process each chunk immediately to save memory

239

process_compressed_chunk(chunk)

240

241

# Combine all chunks if needed

242

final_compressed = b''.join(compressed_chunks)

243

```

244

245

### Multi-Threading Compression

246

247

Parallel compression for improved performance on multi-core systems.

248

249

```python { .api }

250

class ZstdCompressor:

251

def multi_compress_to_buffer(

252

self,

253

data,

254

threads: int = 0

255

) -> BufferWithSegmentsCollection:

256

"""

257

Compress multiple data items in parallel.

258

259

Parameters:

260

- data: BufferWithSegments, BufferWithSegmentsCollection, or list of bytes

261

- threads: int, number of threads (0 = auto, -1 = no threading)

262

263

Returns:

264

BufferWithSegmentsCollection: Collection of compressed segments

265

"""

266

```

267

268

**Usage Example:**

269

270

```python

271

import zstandard as zstd

272

273

compressor = zstd.ZstdCompressor(level=3)

274

275

# Prepare multiple data items

276

data_items = [

277

b"First piece of data to compress",

278

b"Second piece of data to compress",

279

b"Third piece of data to compress"

280

]

281

282

# Compress in parallel

283

result = compressor.multi_compress_to_buffer(data_items, threads=4)

284

285

# Access compressed segments

286

for i in range(len(result)):

287

segment = result[i]

288

print(f"Segment {i}: {len(segment)} bytes")

289

compressed_data = segment.tobytes()

290

```

291

292

### Frame Progression Monitoring

293

294

Monitor compression progress and statistics during multi-threaded operations.

295

296

```python { .api }

297

class ZstdCompressor:

298

def frame_progression(self) -> tuple[int, int, int]:

299

"""

300

Get compression progress information.

301

302

Returns:

303

tuple[int, int, int]: (bytes_read, bytes_written, bytes_flushed)

304

"""

305

```

306

307

**Usage Example:**

308

309

```python

310

import zstandard as zstd

311

312

compressor = zstd.ZstdCompressor(level=5, threads=4)

313

314

# Start compression

315

data = b"Large data to monitor compression progress"

316

compressed = compressor.compress(data)

317

318

# Get progression statistics

319

bytes_read, bytes_written, bytes_flushed = compressor.frame_progression()

320

print(f"Read: {bytes_read}, Written: {bytes_written}, Flushed: {bytes_flushed}")

321

```

322

323

### Compression Parameters

324

325

Fine-grained control over compression behavior through detailed parameter configuration.

326

327

```python { .api }

328

class ZstdCompressionParameters:

329

def __init__(

330

self,

331

format: int = FORMAT_ZSTD1,

332

compression_level: int = 3,

333

window_log: int = 0,

334

hash_log: int = 0,

335

chain_log: int = 0,

336

search_log: int = 0,

337

min_match: int = 0,

338

target_length: int = 0,

339

strategy: int = 0,

340

write_content_size: int = -1,

341

write_checksum: int = -1,

342

write_dict_id: int = -1,

343

job_size: int = 0,

344

overlap_log: int = 0,

345

force_max_window: int = 0,

346

enable_ldm: int = 0,

347

ldm_hash_log: int = 0,

348

ldm_min_match: int = 0,

349

ldm_bucket_size_log: int = 0,

350

ldm_hash_rate_log: int = 0,

351

threads: int = 0

352

):

353

"""

354

Create detailed compression parameters.

355

356

Parameters:

357

- format: int, compression format (FORMAT_ZSTD1, FORMAT_ZSTD1_MAGICLESS)

358

- compression_level: int, compression level (1-22)

359

- window_log: int, window size as power of 2 (10-31)

360

- hash_log: int, hash table size as power of 2 (6-26)

361

- chain_log: int, chain table size as power of 2 (6-28)

362

- search_log: int, search length as power of 2 (1-26)

363

- min_match: int, minimum match length (3-7)

364

- target_length: int, target match length (0-999)

365

- strategy: int, compression strategy (STRATEGY_*)

366

- write_content_size: int, write content size (-1=auto, 0=no, 1=yes)

367

- write_checksum: int, write checksum (-1=auto, 0=no, 1=yes)

368

- write_dict_id: int, write dictionary ID (-1=auto, 0=no, 1=yes)

369

- job_size: int, job size for threading

370

- overlap_log: int, overlap size as power of 2

371

- force_max_window: int, force maximum window size

372

- enable_ldm: int, enable long distance matching

373

- ldm_hash_log: int, LDM hash table size as power of 2

374

- ldm_min_match: int, LDM minimum match length

375

- ldm_bucket_size_log: int, LDM bucket size as power of 2

376

- ldm_hash_rate_log: int, LDM hash rate as power of 2

377

- threads: int, number of threads

378

"""

379

380

@staticmethod

381

def from_level(

382

level: int,

383

source_size: int = 0,

384

dict_size: int = 0,

385

**kwargs

386

) -> ZstdCompressionParameters:

387

"""

388

Create parameters from compression level with optional hints.

389

390

Parameters:

391

- level: int, compression level (1-22)

392

- source_size: int, hint about source data size

393

- dict_size: int, dictionary size if using dictionary

394

- **kwargs: additional parameter overrides

395

396

Returns:

397

ZstdCompressionParameters: Configured parameters

398

"""

399

400

def estimated_compression_context_size(self) -> int:

401

"""Estimate memory usage for these parameters in bytes."""

402

403

class CompressionParameters(ZstdCompressionParameters):

404

"""Compatibility alias for ZstdCompressionParameters."""

405

```

406

407

**Usage Example:**

408

409

```python

410

import zstandard as zstd

411

412

# Create parameters from level with custom tweaks

413

params = zstd.ZstdCompressionParameters.from_level(

414

level=9,

415

source_size=1024*1024, # 1MB hint

416

strategy=zstd.STRATEGY_BTULTRA2,

417

enable_ldm=1

418

)

419

420

# Use custom parameters

421

compressor = zstd.ZstdCompressor(compression_params=params)

422

compressed = compressor.compress(data)

423

424

# Check memory usage

425

memory_usage = params.estimated_compression_context_size()

426

print(f"Estimated memory usage: {memory_usage} bytes")

427

```

428

429

### Streaming Objects

430

431

Low-level streaming compression objects for fine-grained control over compression process.

432

433

```python { .api }

434

class ZstdCompressionObj:

435

def compress(self, data: bytes) -> bytes:

436

"""

437

Compress data chunk.

438

439

Parameters:

440

- data: bytes to compress

441

442

Returns:

443

bytes: Compressed data (may be empty)

444

"""

445

446

def flush(self, flush_mode: int = COMPRESSOBJ_FLUSH_FINISH) -> bytes:

447

"""

448

Flush compression buffer.

449

450

Parameters:

451

- flush_mode: int, flush mode (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK)

452

453

Returns:

454

bytes: Final compressed data

455

"""

456

457

class ZstdCompressionChunker:

458

def compress(self, data: bytes):

459

"""Compress data and yield chunks."""

460

461

def flush(self):

462

"""Flush any remaining data."""

463

464

def finish(self):

465

"""Finish compression and yield final chunks."""

466

```

467

468

**Usage Example:**

469

470

```python

471

import zstandard as zstd

472

473

compressor = zstd.ZstdCompressor(level=5)

474

475

# Streaming object

476

obj = compressor.compressobj()

477

compressed_chunks = []

478

479

# Compress data in chunks

480

compressed_chunks.append(obj.compress(b"First chunk"))

481

compressed_chunks.append(obj.compress(b"Second chunk"))

482

compressed_chunks.append(obj.flush()) # Final data

483

484

# Chunker interface

485

chunker = compressor.chunker()

486

for chunk in chunker.compress(b"Data to compress"):

487

process_compressed_chunk(chunk)

488

489

for chunk in chunker.finish():

490

process_final_chunk(chunk)

491

```