or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-compression.mdcore-utilities.mdindex.mdstandard-compression.md

advanced-compression.mddocs/

0

# Advanced Compression

1

2

Advanced compression modules with specialized features beyond standard compress/decompress operations.

3

4

## Imports

5

6

```python { .api }

7

from cramjam import snappy, lz4, xz

8

```

9

10

## Snappy Module

11

12

Fast compression with support for both framed and raw formats.

13

14

### Standard Framed Operations

15

16

```python { .api }

17

def compress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:

18

"""Snappy compression using framed encoding.

19

20

Args:

21

data: Input data to compress

22

output_len: Optional expected output length

23

24

Returns:

25

Buffer: Compressed data with framing

26

"""

27

28

def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:

29

"""Snappy decompression using framed encoding.

30

31

Args:

32

data: Framed compressed data to decompress

33

output_len: Optional expected output length

34

35

Returns:

36

Buffer: Decompressed data

37

"""

38

```

39

40

### Raw Format Operations

41

42

```python { .api }

43

def compress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:

44

"""Snappy compression without framed encoding.

45

46

Args:

47

data: Input data to compress

48

output_len: Optional expected output length

49

50

Returns:

51

Buffer: Raw compressed data (no framing headers)

52

"""

53

54

def decompress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:

55

"""Snappy decompression without framed encoding.

56

57

Args:

58

data: Raw compressed data to decompress

59

output_len: Optional expected output length

60

61

Returns:

62

Buffer: Decompressed data

63

"""

64

```

65

66

### Direct Buffer Operations

67

68

```python { .api }

69

# Framed format

70

def compress_into(input: BufferProtocol, output: BufferProtocol) -> int:

71

"""Compress into output buffer using framed format."""

72

73

def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:

74

"""Decompress from framed format into output buffer."""

75

76

# Raw format

77

def compress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:

78

"""Compress into output buffer using raw format."""

79

80

def decompress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:

81

"""Decompress from raw format into output buffer."""

82

```

83

84

### Utility Functions

85

86

```python { .api }

87

def compress_raw_max_len(data: BufferProtocol) -> int:

88

"""Get expected max compressed length for snappy raw compression.

89

90

This is the size of buffer that should be passed to compress_raw_into.

91

92

Args:

93

data: Input data to estimate compressed size for

94

95

Returns:

96

int: Maximum possible compressed size

97

"""

98

99

def decompress_raw_len(data: BufferProtocol) -> int:

100

"""Get decompressed length for the given raw compressed data.

101

102

This is the size of buffer that should be passed to decompress_raw_into.

103

104

Args:

105

data: Raw compressed data

106

107

Returns:

108

int: Exact decompressed data size

109

"""

110

```

111

112

### Streaming Classes

113

114

```python { .api }

115

class Compressor:

116

"""Snappy compressor for streaming compression (framed format)."""

117

118

def __init__(self) -> None:

119

"""Initialize streaming compressor."""

120

121

def compress(self, input: bytes) -> int:

122

"""Compress input into the current compressor's stream."""

123

124

def flush(self) -> Buffer:

125

"""Flush and return current compressed stream."""

126

127

def finish(self) -> Buffer:

128

"""Consume compressor state and return final compressed stream."""

129

130

class Decompressor:

131

"""Snappy streaming decompressor."""

132

```

133

134

### Snappy Usage Examples

135

136

```python { .api }

137

import cramjam

138

139

data = b"Snappy compression test" * 1000

140

141

# Framed format (standard, includes headers)

142

framed_compressed = cramjam.snappy.compress(data)

143

framed_decompressed = cramjam.snappy.decompress(framed_compressed)

144

145

# Raw format (no headers, smaller output)

146

raw_compressed = cramjam.snappy.compress_raw(data)

147

raw_decompressed = cramjam.snappy.decompress_raw(raw_compressed)

148

149

# Efficient raw format with pre-calculated sizes

150

max_compressed_size = cramjam.snappy.compress_raw_max_len(data)

151

output_buffer = cramjam.Buffer()

152

output_buffer.set_len(max_compressed_size)

153

actual_size = cramjam.snappy.compress_raw_into(data, output_buffer)

154

155

# Decompress with known size

156

decompressed_size = cramjam.snappy.decompress_raw_len(raw_compressed)

157

decomp_buffer = cramjam.Buffer()

158

decomp_buffer.set_len(decompressed_size)

159

cramjam.snappy.decompress_raw_into(raw_compressed, decomp_buffer)

160

```

161

162

## LZ4 Module

163

164

Ultra-fast compression with block operations and advanced parameters.

165

166

### Standard Frame Operations

167

168

```python { .api }

169

def compress(data: BufferProtocol, level: Optional[int] = None, output_len: Optional[int] = None) -> Buffer:

170

"""LZ4 frame compression.

171

172

Args:

173

data: Input data to compress

174

level: Compression level (optional)

175

output_len: Optional expected output length (currently ignored)

176

177

Note: output_len is ignored; underlying algorithm does not support reading to slice

178

"""

179

180

def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:

181

"""LZ4 frame decompression.

182

183

Args:

184

data: LZ4 frame compressed data

185

output_len: Optional expected output length (currently ignored)

186

187

Note: output_len is ignored; underlying algorithm does not support reading to slice

188

"""

189

190

def compress_into(input: BufferProtocol, output: BufferProtocol, level: Optional[int] = None) -> int:

191

"""Compress into output buffer using LZ4 frame format."""

192

193

def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:

194

"""Decompress LZ4 frame into output buffer."""

195

```

196

197

### Block Operations

198

199

```python { .api }

200

def compress_block(data: BufferProtocol, output_len: Optional[int] = None, mode: Optional[str] = None,

201

acceleration: Optional[int] = None, compression: Optional[int] = None,

202

store_size: Optional[bool] = None) -> Buffer:

203

"""LZ4 block compression with advanced parameters.

204

205

Args:

206

data: Input data to compress

207

output_len: Optional expected output length

208

mode: Compression mode (optional)

209

acceleration: Acceleration parameter for faster compression (optional)

210

compression: Compression parameter for better ratio (optional)

211

store_size: Whether to store size in header for decompression (optional)

212

213

Returns:

214

Buffer: Compressed block data

215

"""

216

217

def decompress_block(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:

218

"""LZ4 block decompression.

219

220

Args:

221

data: Compressed block data

222

output_len: Optional upper bound length of decompressed data.

223

If not provided, assumes store_size=True was used during compression

224

225

Returns:

226

Buffer: Decompressed data

227

"""

228

229

def compress_block_into(data: BufferProtocol, output: BufferProtocol, mode: Optional[str] = None,

230

acceleration: Optional[int] = None, store_size: Optional[bool] = None) -> int:

231

"""LZ4 block compression into pre-allocated buffer.

232

233

Args:

234

data: Input data to compress

235

output: Pre-allocated output buffer

236

mode: Compression mode (optional)

237

acceleration: Acceleration parameter (optional)

238

store_size: Whether to store size in header (optional)

239

240

Returns:

241

int: Number of bytes written

242

"""

243

244

def decompress_block_into(input: BufferProtocol, output: BufferProtocol, output_len: Optional[int] = None) -> int:

245

"""LZ4 block decompression into pre-allocated buffer.

246

247

Args:

248

input: Compressed block data

249

output: Pre-allocated output buffer

250

output_len: Optional output length hint

251

252

Returns:

253

int: Number of bytes written

254

"""

255

```

256

257

### Utility Functions

258

259

```python { .api }

260

def compress_block_bound(src: BufferProtocol) -> int:

261

"""Determine guaranteed buffer size for block compression.

262

263

Args:

264

src: Source data to compress

265

266

Returns:

267

int: Buffer size guaranteed to hold compression result

268

269

Raises:

270

Error: If data is too long to be compressed by LZ4

271

"""

272

```

273

274

### Enhanced Streaming Classes

275

276

```python { .api }

277

class Compressor:

278

"""LZ4 streaming compressor with advanced options."""

279

280

def __init__(self, level: Optional[int] = None, content_checksum: Optional[bool] = None,

281

block_linked: Optional[bool] = None) -> None:

282

"""Initialize LZ4 compressor.

283

284

Args:

285

level: Compression level (optional)

286

content_checksum: Enable content checksum (optional)

287

block_linked: Enable block linking for better compression (optional)

288

"""

289

290

def compress(self, input: bytes) -> int:

291

"""Add data to compression stream."""

292

293

def flush(self) -> Buffer:

294

"""Flush and return current compressed stream."""

295

296

def finish(self) -> Buffer:

297

"""Finish compression and return final stream."""

298

299

class Decompressor:

300

"""LZ4 streaming decompressor."""

301

302

def __init__(self, *args, **kwargs) -> None:

303

"""Initialize decompressor with flexible arguments."""

304

305

def decompress(self, data: bytes) -> Buffer:

306

"""Decompress data chunk."""

307

```

308

309

### LZ4 Usage Examples

310

311

```python { .api }

312

import cramjam

313

314

data = b"LZ4 ultra-fast compression" * 2000

315

316

# Standard frame compression

317

compressed = cramjam.lz4.compress(data, level=1) # Fast compression

318

decompressed = cramjam.lz4.decompress(compressed)

319

320

# Block compression with size storage

321

block_compressed = cramjam.lz4.compress_block(data, store_size=True)

322

block_decompressed = cramjam.lz4.decompress_block(block_compressed) # No output_len needed

323

324

# Block compression with acceleration

325

fast_compressed = cramjam.lz4.compress_block(data, acceleration=10, store_size=True)

326

327

# Pre-allocated buffer with bound calculation

328

bound_size = cramjam.lz4.compress_block_bound(data)

329

output = cramjam.Buffer()

330

output.set_len(bound_size)

331

actual_size = cramjam.lz4.compress_block_into(data, output, acceleration=5)

332

333

# Advanced streaming with options

334

compressor = cramjam.lz4.Compressor(level=5, content_checksum=True, block_linked=True)

335

compressor.compress(b"First chunk")

336

compressor.compress(b"Second chunk")

337

result = compressor.finish()

338

```

339

340

## XZ/LZMA Module

341

342

High-ratio compression with comprehensive configuration options.

343

344

### Enums and Configuration

345

346

```python { .api }

347

# Compression formats

348

class Format(Enum):

349

AUTO = ... # Auto-detect format

350

XZ = ... # XZ format

351

ALONE = ... # Legacy LZMA alone format

352

RAW = ... # Raw LZMA data

353

354

# Checksum types

355

class Check(Enum):

356

NONE = ... # No checksum

357

Crc32 = ... # CRC32 checksum

358

Crc64 = ... # CRC64 checksum

359

Sha256 = ... # SHA256 checksum

360

361

# Available filters

362

class Filter(Enum):

363

Lzma1 = ... # LZMA1 algorithm

364

Lzma2 = ... # LZMA2 algorithm (default)

365

X86 = ... # x86 BCJ filter

366

PowerPC = ... # PowerPC BCJ filter

367

Ia64 = ... # IA-64 BCJ filter

368

Arm = ... # ARM BCJ filter

369

ArmThumb = ... # ARM-Thumb BCJ filter

370

Sparc = ... # SPARC BCJ filter

371

372

# Match finder algorithms

373

class MatchFinder(Enum):

374

HashChain3 = ... # Hash chain with 3-byte hashing

375

HashChain4 = ... # Hash chain with 4-byte hashing

376

BinaryTree2 = ... # Binary tree with 2-byte hashing

377

BinaryTree3 = ... # Binary tree with 3-byte hashing

378

BinaryTree4 = ... # Binary tree with 4-byte hashing

379

380

# Compression modes

381

class Mode(Enum):

382

Fast = ... # Fast compression mode

383

Normal = ... # Normal compression mode

384

```

385

386

### Configuration Classes

387

388

```python { .api }

389

class Options:

390

"""Configuration options for XZ compression."""

391

392

def __init__(self) -> None:

393

"""Initialize options object."""

394

395

def set_preset(self, preset: int) -> Options:

396

"""Set compression preset (0-9).

397

398

Returns: Self for method chaining

399

"""

400

401

def set_dict_size(self, dict_size: int) -> Options:

402

"""Set dictionary size in bytes."""

403

404

def set_lc(self, lc: int) -> Options:

405

"""Set literal context bits (0-4)."""

406

407

def set_lp(self, lp: int) -> Options:

408

"""Set literal position bits (0-4)."""

409

410

def set_pb(self, pb: int) -> Options:

411

"""Set position bits (0-4)."""

412

413

def set_mode(self, mode: Mode) -> Options:

414

"""Set compression mode."""

415

416

def set_nice_len(self, nice_len: int) -> Options:

417

"""Set nice length parameter (3-273)."""

418

419

def set_mf(self, mf: MatchFinder) -> Options:

420

"""Set match finder algorithm."""

421

422

def set_depth(self, depth: int) -> Options:

423

"""Set search depth (0-1000)."""

424

425

class FilterChainItem:

426

"""Individual filter in compression chain."""

427

428

def __init__(self, filter: Filter, options: Optional[Options] = None) -> None:

429

"""Initialize filter chain item.

430

431

Args:

432

filter: Filter type to use

433

options: Optional configuration for this filter

434

"""

435

436

class FilterChain:

437

"""Chain of filters for advanced compression pipeline."""

438

439

def __init__(self) -> None:

440

"""Initialize empty filter chain."""

441

442

def append_filter(self, filter_chain_item: FilterChainItem) -> None:

443

"""Add filter to the chain.

444

445

Args:

446

filter_chain_item: Configured filter to append

447

"""

448

```

449

450

### Compression Functions

451

452

```python { .api }

453

def compress(data: BufferProtocol, preset: Optional[int] = None, format: Optional[Format] = None,

454

check: Optional[Check] = None, filters: Optional[FilterChain] = None,

455

options: Optional[Options] = None, output_len: Optional[int] = None) -> Buffer:

456

"""LZMA compression with comprehensive options.

457

458

Args:

459

data: Input data to compress

460

preset: Compression preset (0-9, default uses library default)

461

format: Compression format (default: XZ)

462

check: Checksum type (default: Crc64 for XZ format)

463

filters: Custom filter chain (optional)

464

options: Fine-grained compression options (optional)

465

output_len: Optional expected output length

466

467

Returns:

468

Buffer: Compressed data

469

"""

470

471

def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:

472

"""LZMA decompression (auto-detects format).

473

474

Args:

475

data: Compressed data to decompress

476

output_len: Optional expected output length

477

478

Returns:

479

Buffer: Decompressed data

480

"""

481

482

def compress_into(input: BufferProtocol, output: BufferProtocol, preset: Optional[int] = None,

483

format: Optional[Format] = None, check: Optional[Check] = None,

484

filters: Optional[FilterChain] = None, options: Optional[Options] = None) -> int:

485

"""LZMA compression directly into output buffer."""

486

487

def decompress_into(data: BufferProtocol, output: BufferProtocol) -> int:

488

"""LZMA decompression directly into output buffer."""

489

```

490

491

### XZ Usage Examples

492

493

```python { .api }

494

import cramjam

495

496

data = b"XZ compression with advanced options" * 1000

497

498

# Simple compression with preset

499

compressed = cramjam.xz.compress(data, preset=6)

500

decompressed = cramjam.xz.decompress(compressed)

501

502

# Custom format and checksum

503

compressed_custom = cramjam.xz.compress(

504

data,

505

format=cramjam.xz.Format.XZ,

506

check=cramjam.xz.Check.Sha256

507

)

508

509

# Advanced options configuration

510

options = (cramjam.xz.Options()

511

.set_preset(5)

512

.set_dict_size(1024 * 1024) # 1MB dictionary

513

.set_mode(cramjam.xz.Mode.Normal)

514

.set_mf(cramjam.xz.MatchFinder.BinaryTree4)

515

.set_depth(100))

516

517

compressed_advanced = cramjam.xz.compress(data, options=options)

518

519

# Custom filter chain with BCJ filter for x86 binaries

520

filter_chain = cramjam.xz.FilterChain()

521

filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))

522

filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))

523

524

compressed_bcj = cramjam.xz.compress(

525

data,

526

filters=filter_chain,

527

format=cramjam.xz.Format.XZ,

528

check=cramjam.xz.Check.Crc64

529

)

530

531

# Legacy LZMA alone format

532

compressed_alone = cramjam.xz.compress(

533

data,

534

format=cramjam.xz.Format.ALONE,

535

preset=9 # Maximum compression

536

)

537

```

538

539

## Advanced Patterns and Best Practices

540

541

### Algorithm Selection Criteria

542

543

```python { .api }

544

# Choose algorithm based on requirements

545

import cramjam

546

547

def compress_data(data, priority='balanced'):

548

"""Compress data based on priority."""

549

550

if priority == 'speed':

551

# Ultra-fast compression

552

return cramjam.lz4.compress(data, level=1)

553

554

elif priority == 'size':

555

# Maximum compression ratio

556

return cramjam.xz.compress(data, preset=9)

557

558

elif priority == 'balanced':

559

# Good speed/size balance

560

return cramjam.zstd.compress(data, level=6)

561

562

elif priority == 'compatibility':

563

# Maximum compatibility

564

return cramjam.gzip.compress(data, level=6)

565

```

566

567

### Memory-Efficient Processing

568

569

```python { .api }

570

import cramjam

571

572

def compress_large_file(input_path, output_path, algorithm='zstd'):

573

"""Compress large file with memory efficiency."""

574

575

# Use streaming for large files

576

if algorithm == 'lz4':

577

compressor = cramjam.lz4.Compressor(

578

level=5,

579

content_checksum=True,

580

block_linked=True

581

)

582

elif algorithm == 'zstd':

583

compressor = cramjam.zstd.Compressor(level=6)

584

else:

585

compressor = cramjam.gzip.Compressor(level=6)

586

587

with open(input_path, 'rb') as infile, open(output_path, 'wb') as outfile:

588

while chunk := infile.read(1024 * 1024): # 1MB chunks

589

compressor.compress(chunk)

590

# Write intermediate results to avoid memory buildup

591

compressed_chunk = compressor.flush()

592

if compressed_chunk:

593

outfile.write(bytes(compressed_chunk))

594

595

# Write final data

596

final_data = compressor.finish()

597

outfile.write(bytes(final_data))

598

```

599

600

### Format-Specific Optimizations

601

602

```python { .api }

603

import cramjam

604

605

# Snappy: Raw format for minimal overhead

606

def fast_compress_raw(data):

607

"""Ultra-fast compression with minimal headers."""

608

return cramjam.snappy.compress_raw(data)

609

610

# LZ4: Block compression with acceleration

611

def compress_with_speed(data, speed_factor=10):

612

"""LZ4 compression optimized for speed."""

613

return cramjam.lz4.compress_block(

614

data,

615

acceleration=speed_factor,

616

store_size=True

617

)

618

619

# XZ: Optimized for executable files

620

def compress_executable(binary_data):

621

"""XZ compression optimized for x86 executables."""

622

options = cramjam.xz.Options().set_preset(6).set_dict_size(2**20)

623

624

filter_chain = cramjam.xz.FilterChain()

625

filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))

626

filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))

627

628

return cramjam.xz.compress(

629

binary_data,

630

filters=filter_chain,

631

check=cramjam.xz.Check.Sha256

632

)

633

```