or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

compression.mdcore-io.mdfilesystem.mdhashing.mdindex.mdutilities.md

compression.mddocs/

0

# Compression and Encoding

1

2

This document covers Okio's data compression capabilities, including GZIP and deflate algorithms with native zlib integration, as well as ZIP file system access.

3

4

## GZIP Compression

5

6

GZIP is a widely-used compression format that combines deflate compression with headers and checksums.

7

8

### GzipSink

9

10

```kotlin { .api }

11

class GzipSink(sink: Sink) : Sink {

12

val deflater: Deflater

13

14

override fun write(source: Buffer, byteCount: Long)

15

override fun flush()

16

override fun timeout(): Timeout

17

override fun close()

18

}

19

20

// Extension function for easy creation

21

fun Sink.gzip(): GzipSink

22

```

23

24

### GzipSource

25

26

```kotlin { .api }

27

class GzipSource(source: Source) : Source {

28

override fun read(sink: Buffer, byteCount: Long): Long

29

override fun timeout(): Timeout

30

override fun close()

31

}

32

33

// Extension function for easy creation

34

fun Source.gzip(): GzipSource

35

```

36

37

### GZIP Usage Examples

38

39

```kotlin

40

// Compressing data with GZIP

41

val originalData = "This is some text that will be compressed using GZIP compression algorithm."

42

val buffer = Buffer()

43

44

// Create GZIP compressed data

45

buffer.gzip().use { gzipSink ->

46

gzipSink.writeUtf8(originalData)

47

}

48

49

val compressedData = buffer.readByteString()

50

println("Original size: ${originalData.length}")

51

println("Compressed size: ${compressedData.size}")

52

println("Compression ratio: ${compressedData.size.toFloat() / originalData.length}")

53

54

// Decompressing GZIP data

55

val decompressedBuffer = Buffer()

56

Buffer().write(compressedData).gzip().use { gzipSource ->

57

decompressedBuffer.writeAll(gzipSource)

58

}

59

60

val decompressedText = decompressedBuffer.readUtf8()

61

println("Decompressed: '$decompressedText'")

62

println("Match: ${originalData == decompressedText}")

63

```

64

65

### File Compression Example

66

67

```kotlin

68

val fs = FileSystem.SYSTEM

69

val sourceFile = "/tmp/large-file.txt".toPath()

70

val compressedFile = "/tmp/large-file.txt.gz".toPath()

71

72

// Create a large text file

73

fs.write(sourceFile) {

74

repeat(1000) { i ->

75

writeUtf8("Line $i: This is some repeated text to demonstrate compression.\n")

76

}

77

}

78

79

// Compress file using GZIP

80

fs.write(compressedFile) {

81

gzip().use { gzipSink ->

82

fs.read(sourceFile) {

83

writeAll(this)

84

}

85

}

86

}

87

88

// Compare file sizes

89

val originalSize = fs.metadata(sourceFile).size ?: 0

90

val compressedSize = fs.metadata(compressedFile).size ?: 0

91

println("Original: $originalSize bytes")

92

println("Compressed: $compressedSize bytes")

93

println("Saved: ${originalSize - compressedSize} bytes (${(1.0 - compressedSize.toDouble() / originalSize) * 100}%)")

94

95

// Decompress and verify

96

val decompressedContent = fs.read(compressedFile) {

97

gzip().buffer().readUtf8()

98

}

99

val originalContent = fs.read(sourceFile) { readUtf8() }

100

println("Content matches: ${originalContent == decompressedContent}")

101

```

102

103

## Deflate Compression

104

105

Deflate is the core compression algorithm used by GZIP and ZIP formats. Okio provides native deflate support using system zlib libraries.

106

107

### Deflater (Native Implementation)

108

109

```kotlin { .api }

110

actual class Deflater {

111

// Constructors

112

constructor() // Default compression

113

constructor(level: Int, nowrap: Boolean) // Custom level and format

114

115

// Properties

116

var flush: Int // Flush mode constants

117

118

// Methods

119

fun getBytesRead(): Long // Total input bytes processed

120

fun end() // End deflation and release resources

121

122

companion object {

123

// Compression levels

124

const val NO_COMPRESSION: Int = 0

125

const val BEST_SPEED: Int = 1

126

const val BEST_COMPRESSION: Int = 9

127

const val DEFAULT_COMPRESSION: Int = -1

128

129

// Flush modes

130

const val NO_FLUSH: Int = 0

131

const val SYNC_FLUSH: Int = 2

132

const val FULL_FLUSH: Int = 3

133

const val FINISH: Int = 4

134

}

135

}

136

```

137

138

### DeflaterSink

139

140

```kotlin { .api }

141

class DeflaterSink(sink: Sink, deflater: Deflater) : Sink {

142

override fun write(source: Buffer, byteCount: Long)

143

override fun flush()

144

override fun timeout(): Timeout

145

override fun close()

146

147

fun finishDeflate() // Finish deflation without closing

148

}

149

```

150

151

### Inflater and InflaterSource

152

153

```kotlin { .api }

154

actual class Inflater {

155

constructor()

156

constructor(nowrap: Boolean)

157

158

fun getBytesRead(): Long

159

fun getBytesWritten(): Long

160

fun end()

161

}

162

163

class InflaterSource(source: Source, inflater: Inflater) : Source {

164

override fun read(sink: Buffer, byteCount: Long): Long

165

override fun timeout(): Timeout

166

override fun close()

167

}

168

```

169

170

### Deflate Usage Examples

171

172

```kotlin

173

// Custom deflate compression with different levels

174

val testData = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".repeat(100)

175

176

fun compressWithLevel(data: String, level: Int): ByteString {

177

val buffer = Buffer()

178

val deflater = Deflater(level, nowrap = false)

179

180

DeflaterSink(buffer, deflater).use { sink ->

181

sink.writeUtf8(data)

182

}

183

184

return buffer.readByteString()

185

}

186

187

// Compare compression levels

188

val levels = listOf(

189

Deflater.NO_COMPRESSION,

190

Deflater.BEST_SPEED,

191

Deflater.DEFAULT_COMPRESSION,

192

Deflater.BEST_COMPRESSION

193

)

194

195

levels.forEach { level ->

196

val compressed = compressWithLevel(testData, level)

197

val ratio = compressed.size.toFloat() / testData.length

198

println("Level $level: ${compressed.size} bytes (${ratio * 100}%)")

199

}

200

201

// Manual deflate/inflate cycle

202

val deflater = Deflater(Deflater.DEFAULT_COMPRESSION, nowrap = true)

203

val inflater = Inflater(nowrap = true)

204

205

val originalBuffer = Buffer().writeUtf8("Data to compress")

206

val compressedBuffer = Buffer()

207

val decompressedBuffer = Buffer()

208

209

// Compress

210

DeflaterSink(compressedBuffer, deflater).use { sink ->

211

sink.writeAll(originalBuffer)

212

}

213

214

// Decompress

215

InflaterSource(compressedBuffer, inflater).use { source ->

216

decompressedBuffer.writeAll(source)

217

}

218

219

println("Original: ${originalBuffer.size} bytes")

220

println("Compressed: ${compressedBuffer.size} bytes")

221

println("Decompressed: ${decompressedBuffer.readUtf8()}")

222

```

223

224

## ZIP File System

225

226

Okio provides read-only access to ZIP files through a FileSystem implementation.

227

228

### ZipFileSystem

229

230

```kotlin { .api }

231

class ZipFileSystem(

232

zipPath: Path,

233

fileSystem: FileSystem = FileSystem.SYSTEM,

234

comment: String? = null

235

) : FileSystem {

236

val comment: String?

237

238

// Implements all FileSystem methods for ZIP file contents

239

override fun canonicalize(path: Path): Path

240

override fun metadataOrNull(path: Path): FileMetadata?

241

override fun list(dir: Path): List<Path>

242

override fun source(file: Path): Source

243

244

// Note: ZIP FileSystem is read-only

245

// write operations throw UnsupportedOperationException

246

}

247

```

248

249

### ZIP File Usage Examples

250

251

```kotlin

252

val fs = FileSystem.SYSTEM

253

val zipPath = "/tmp/example.zip".toPath()

254

255

// Create a ZIP file (using standard Java ZIP APIs for writing)

256

java.util.zip.ZipOutputStream(fs.sink(zipPath).buffer().outputStream()).use { zipOut ->

257

// Add first file

258

zipOut.putNextEntry(java.util.zip.ZipEntry("hello.txt"))

259

zipOut.write("Hello, ZIP world!".toByteArray())

260

zipOut.closeEntry()

261

262

// Add second file in subdirectory

263

zipOut.putNextEntry(java.util.zip.ZipEntry("subdir/file2.txt"))

264

zipOut.write("File in subdirectory".toByteArray())

265

zipOut.closeEntry()

266

267

// Add empty directory

268

zipOut.putNextEntry(java.util.zip.ZipEntry("empty-dir/"))

269

zipOut.closeEntry()

270

}

271

272

// Read ZIP file using Okio ZipFileSystem

273

val zipFs = ZipFileSystem(zipPath)

274

275

// List contents of ZIP file

276

println("ZIP file contents:")

277

zipFs.listRecursively("/".toPath()).forEach { path ->

278

val metadata = zipFs.metadataOrNull(path)

279

val type = when {

280

metadata?.isDirectory == true -> "[DIR]"

281

metadata?.isRegularFile == true -> "[FILE]"

282

else -> "[OTHER]"

283

}

284

val size = metadata?.size?.let { " (${it} bytes)" } ?: ""

285

println("$type $path$size")

286

}

287

288

// Read files from ZIP

289

val helloContent = zipFs.read("/hello.txt".toPath()) {

290

readUtf8()

291

}

292

println("Content of hello.txt: '$helloContent'")

293

294

val subdirContent = zipFs.read("/subdir/file2.txt".toPath()) {

295

readUtf8()

296

}

297

println("Content of subdir/file2.txt: '$subdirContent'")

298

299

// Check ZIP comment

300

println("ZIP comment: ${zipFs.comment}")

301

302

// Cleanup

303

fs.delete(zipPath)

304

```

305

306

### ZIP File Metadata

307

308

```kotlin

309

val zipFs = ZipFileSystem(zipPath)

310

311

// Get detailed metadata for ZIP entries

312

zipFs.list("/".toPath()).forEach { path ->

313

val metadata = zipFs.metadata(path)

314

315

println("Path: $path")

316

println(" Type: ${if (metadata.isDirectory) "Directory" else "File"}")

317

println(" Size: ${metadata.size} bytes")

318

319

metadata.lastModifiedAtMillis?.let { timestamp ->

320

val date = java.util.Date(timestamp)

321

println(" Modified: $date")

322

}

323

324

// ZIP-specific metadata in extras

325

metadata.extras.forEach { (type, value) ->

326

println(" Extra ${type.simpleName}: $value")

327

}

328

}

329

```

330

331

## Compression Utilities

332

333

### Choosing Compression Methods

334

335

```kotlin

336

// Utility function to compare compression methods

337

fun compareCompressionMethods(data: String) {

338

val originalSize = data.length

339

340

// GZIP compression

341

val gzipBuffer = Buffer()

342

gzipBuffer.gzip().use { sink ->

343

sink.writeUtf8(data)

344

}

345

val gzipSize = gzipBuffer.size

346

347

// Raw deflate compression

348

val deflateBuffer = Buffer()

349

val deflater = Deflater(Deflater.DEFAULT_COMPRESSION, nowrap = true)

350

DeflaterSink(deflateBuffer, deflater).use { sink ->

351

sink.writeUtf8(data)

352

}

353

val deflateSize = deflateBuffer.size

354

355

println("Original: $originalSize bytes")

356

println("GZIP: $gzipSize bytes (${gzipSize.toFloat() / originalSize * 100}%)")

357

println("Deflate: $deflateSize bytes (${deflateSize.toFloat() / originalSize * 100}%)")

358

println("GZIP overhead: ${gzipSize - deflateSize} bytes")

359

}

360

361

// Test with different types of data

362

compareCompressionMethods("A".repeat(1000)) // Highly repetitive

363

compareCompressionMethods("The quick brown fox jumps over the lazy dog. ".repeat(50)) // Natural text

364

compareCompressionMethods((0..255).map { it.toChar() }.joinToString("")) // Random-like data

365

```

366

367

### Streaming Compression

368

369

```kotlin

370

// Compress large amounts of data without loading everything into memory

371

fun compressLargeFile(inputPath: Path, outputPath: Path) {

372

val fs = FileSystem.SYSTEM

373

374

fs.sink(outputPath).buffer().gzip().use { compressedSink ->

375

fs.source(inputPath).buffer().use { source ->

376

// Process in chunks to avoid memory issues

377

while (!source.exhausted()) {

378

val chunk = source.readByteString(minOf(8192L, source.buffer.size))

379

compressedSink.write(chunk)

380

}

381

}

382

}

383

}

384

385

// Decompress with progress monitoring

386

fun decompressWithProgress(inputPath: Path, outputPath: Path) {

387

val fs = FileSystem.SYSTEM

388

val totalSize = fs.metadata(inputPath).size ?: 0L

389

var processedBytes = 0L

390

391

fs.sink(outputPath).buffer().use { output ->

392

fs.source(inputPath).buffer().gzip().use { compressedSource ->

393

val buffer = Buffer()

394

395

while (!compressedSource.exhausted()) {

396

val bytesRead = compressedSource.read(buffer, 8192L)

397

if (bytesRead > 0) {

398

output.write(buffer, bytesRead)

399

processedBytes += bytesRead

400

val progress = (processedBytes.toFloat() / totalSize * 100).toInt()

401

print("\rDecompressing: $progress%")

402

}

403

}

404

}

405

}

406

println("\nDecompression complete!")

407

}

408

```

409

410

## Error Handling

411

412

Compression operations can encounter various error conditions:

413

414

```kotlin { .api }

415

expect open class IOException : Exception

416

expect class DataFormatException : Exception // Invalid compressed data format

417

expect class ZipException : IOException // ZIP file format issues

418

```

419

420

### Common Error Scenarios

421

422

```kotlin

423

// Handle corrupted compressed data

424

fun safeDecompress(compressedData: ByteString): String? {

425

return try {

426

Buffer().write(compressedData).gzip().buffer().readUtf8()

427

} catch (e: DataFormatException) {

428

println("Corrupted compressed data: ${e.message}")

429

null

430

} catch (e: IOException) {

431

println("I/O error during decompression: ${e.message}")

432

null

433

}

434

}

435

436

// Handle ZIP file errors

437

fun safeReadZip(zipPath: Path): List<String> {

438

return try {

439

val zipFs = ZipFileSystem(zipPath)

440

zipFs.listRecursively("/".toPath())

441

.filter { path -> zipFs.metadata(path).isRegularFile }

442

.map { path -> path.toString() }

443

.toList()

444

} catch (e: ZipException) {

445

println("Invalid ZIP file: ${e.message}")

446

emptyList()

447

} catch (e: FileNotFoundException) {

448

println("ZIP file not found: ${e.message}")

449

emptyList()

450

}

451

}

452

453

// Resource cleanup with error handling

454

fun compressWithCleanup(data: String): ByteString? {

455

val deflater = Deflater()

456

val buffer = Buffer()

457

458

return try {

459

DeflaterSink(buffer, deflater).use { sink ->

460

sink.writeUtf8(data)

461

}

462

buffer.readByteString()

463

} catch (e: Exception) {

464

println("Compression failed: ${e.message}")

465

null

466

} finally {

467

// Ensure native resources are released

468

deflater.end()

469

}

470

}

471

```

472

473

## Performance Considerations

474

475

### Memory Usage

476

477

```kotlin

478

// Efficient streaming for large data

479

fun efficientCompression(inputSource: Source, outputSink: Sink) {

480

// Use buffered streams to optimize I/O

481

val bufferedOutput = outputSink.buffer()

482

val gzipSink = bufferedOutput.gzip()

483

484

inputSource.buffer().use { bufferedInput ->

485

gzipSink.use { compressor ->

486

// Process in reasonable chunks

487

val buffer = Buffer()

488

while (!bufferedInput.exhausted()) {

489

val bytesRead = bufferedInput.read(buffer, 16384L) // 16KB chunks

490

if (bytesRead > 0) {

491

compressor.write(buffer, bytesRead)

492

}

493

}

494

}

495

}

496

}

497

```

498

499

### Compression Level Trade-offs

500

501

```kotlin

502

// Benchmark different compression levels

503

fun benchmarkCompression(data: ByteString) {

504

val levels = listOf(

505

Deflater.BEST_SPEED to "Best Speed",

506

Deflater.DEFAULT_COMPRESSION to "Default",

507

Deflater.BEST_COMPRESSION to "Best Compression"

508

)

509

510

levels.forEach { (level, name) ->

511

val startTime = System.currentTimeMillis()

512

513

val buffer = Buffer()

514

val deflater = Deflater(level, nowrap = false)

515

DeflaterSink(buffer, deflater).use { sink ->

516

sink.write(data)

517

}

518

519

val endTime = System.currentTimeMillis()

520

val compressedSize = buffer.size

521

val ratio = compressedSize.toFloat() / data.size

522

523

println("$name: ${compressedSize} bytes (${ratio * 100}%) in ${endTime - startTime}ms")

524

525

deflater.end()

526

}

527

}

528

```