0
# Compression and Encoding
1
2
This document covers Okio's data compression capabilities, including GZIP and deflate algorithms with native zlib integration, as well as ZIP file system access.
3
4
## GZIP Compression
5
6
GZIP is a widely-used compression format that combines deflate compression with headers and checksums.
7
8
### GzipSink
9
10
```kotlin { .api }
11
class GzipSink(sink: Sink) : Sink {
12
val deflater: Deflater
13
14
override fun write(source: Buffer, byteCount: Long)
15
override fun flush()
16
override fun timeout(): Timeout
17
override fun close()
18
}
19
20
// Extension function for easy creation
21
fun Sink.gzip(): GzipSink
22
```
23
24
### GzipSource
25
26
```kotlin { .api }
27
class GzipSource(source: Source) : Source {
28
override fun read(sink: Buffer, byteCount: Long): Long
29
override fun timeout(): Timeout
30
override fun close()
31
}
32
33
// Extension function for easy creation
34
fun Source.gzip(): GzipSource
35
```
36
37
### GZIP Usage Examples
38
39
```kotlin
40
// Compressing data with GZIP
41
val originalData = "This is some text that will be compressed using GZIP compression algorithm."
42
val buffer = Buffer()
43
44
// Create GZIP compressed data
45
buffer.gzip().use { gzipSink ->
46
gzipSink.writeUtf8(originalData)
47
}
48
49
val compressedData = buffer.readByteString()
50
println("Original size: ${originalData.length}")
51
println("Compressed size: ${compressedData.size}")
52
println("Compression ratio: ${compressedData.size.toFloat() / originalData.length}")
53
54
// Decompressing GZIP data
55
val decompressedBuffer = Buffer()
56
Buffer().write(compressedData).gzip().use { gzipSource ->
57
decompressedBuffer.writeAll(gzipSource)
58
}
59
60
val decompressedText = decompressedBuffer.readUtf8()
61
println("Decompressed: '$decompressedText'")
62
println("Match: ${originalData == decompressedText}")
63
```
64
65
### File Compression Example
66
67
```kotlin
68
val fs = FileSystem.SYSTEM
69
val sourceFile = "/tmp/large-file.txt".toPath()
70
val compressedFile = "/tmp/large-file.txt.gz".toPath()
71
72
// Create a large text file
73
fs.write(sourceFile) {
74
repeat(1000) { i ->
75
writeUtf8("Line $i: This is some repeated text to demonstrate compression.\n")
76
}
77
}
78
79
// Compress file using GZIP
80
fs.write(compressedFile) {
81
gzip().use { gzipSink ->
82
fs.read(sourceFile) {
83
writeAll(this)
84
}
85
}
86
}
87
88
// Compare file sizes
89
val originalSize = fs.metadata(sourceFile).size ?: 0
90
val compressedSize = fs.metadata(compressedFile).size ?: 0
91
println("Original: $originalSize bytes")
92
println("Compressed: $compressedSize bytes")
93
println("Saved: ${originalSize - compressedSize} bytes (${(1.0 - compressedSize.toDouble() / originalSize) * 100}%)")
94
95
// Decompress and verify
96
val decompressedContent = fs.read(compressedFile) {
97
gzip().buffer().readUtf8()
98
}
99
val originalContent = fs.read(sourceFile) { readUtf8() }
100
println("Content matches: ${originalContent == decompressedContent}")
101
```
102
103
## Deflate Compression
104
105
Deflate is the core compression algorithm used by GZIP and ZIP formats. Okio provides native deflate support using system zlib libraries.
106
107
### Deflater (Native Implementation)
108
109
```kotlin { .api }
110
actual class Deflater {
111
// Constructors
112
constructor() // Default compression
113
constructor(level: Int, nowrap: Boolean) // Custom level and format
114
115
// Properties
116
var flush: Int // Flush mode constants
117
118
// Methods
119
fun getBytesRead(): Long // Total input bytes processed
120
fun end() // End deflation and release resources
121
122
companion object {
123
// Compression levels
124
const val NO_COMPRESSION: Int = 0
125
const val BEST_SPEED: Int = 1
126
const val BEST_COMPRESSION: Int = 9
127
const val DEFAULT_COMPRESSION: Int = -1
128
129
// Flush modes
130
const val NO_FLUSH: Int = 0
131
const val SYNC_FLUSH: Int = 2
132
const val FULL_FLUSH: Int = 3
133
const val FINISH: Int = 4
134
}
135
}
136
```
137
138
### DeflaterSink
139
140
```kotlin { .api }
141
class DeflaterSink(sink: Sink, deflater: Deflater) : Sink {
142
override fun write(source: Buffer, byteCount: Long)
143
override fun flush()
144
override fun timeout(): Timeout
145
override fun close()
146
147
fun finishDeflate() // Finish deflation without closing
148
}
149
```
150
151
### Inflater and InflaterSource
152
153
```kotlin { .api }
154
actual class Inflater {
155
constructor()
156
constructor(nowrap: Boolean)
157
158
fun getBytesRead(): Long
159
fun getBytesWritten(): Long
160
fun end()
161
}
162
163
class InflaterSource(source: Source, inflater: Inflater) : Source {
164
override fun read(sink: Buffer, byteCount: Long): Long
165
override fun timeout(): Timeout
166
override fun close()
167
}
168
```
169
170
### Deflate Usage Examples
171
172
```kotlin
173
// Custom deflate compression with different levels
174
val testData = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".repeat(100)
175
176
fun compressWithLevel(data: String, level: Int): ByteString {
177
val buffer = Buffer()
178
val deflater = Deflater(level, nowrap = false)
179
180
DeflaterSink(buffer, deflater).use { sink ->
181
sink.writeUtf8(data)
182
}
183
184
return buffer.readByteString()
185
}
186
187
// Compare compression levels
188
val levels = listOf(
189
Deflater.NO_COMPRESSION,
190
Deflater.BEST_SPEED,
191
Deflater.DEFAULT_COMPRESSION,
192
Deflater.BEST_COMPRESSION
193
)
194
195
levels.forEach { level ->
196
val compressed = compressWithLevel(testData, level)
197
val ratio = compressed.size.toFloat() / testData.length
198
println("Level $level: ${compressed.size} bytes (${ratio * 100}%)")
199
}
200
201
// Manual deflate/inflate cycle
202
val deflater = Deflater(Deflater.DEFAULT_COMPRESSION, nowrap = true)
203
val inflater = Inflater(nowrap = true)
204
205
val originalBuffer = Buffer().writeUtf8("Data to compress")
206
val compressedBuffer = Buffer()
207
val decompressedBuffer = Buffer()
208
209
// Compress
210
DeflaterSink(compressedBuffer, deflater).use { sink ->
211
sink.writeAll(originalBuffer)
212
}
213
214
// Decompress
215
InflaterSource(compressedBuffer, inflater).use { source ->
216
decompressedBuffer.writeAll(source)
217
}
218
219
println("Original: ${originalBuffer.size} bytes")
220
println("Compressed: ${compressedBuffer.size} bytes")
221
println("Decompressed: ${decompressedBuffer.readUtf8()}")
222
```
223
224
## ZIP File System
225
226
Okio provides read-only access to ZIP files through a FileSystem implementation.
227
228
### ZipFileSystem
229
230
```kotlin { .api }
231
class ZipFileSystem(
232
zipPath: Path,
233
fileSystem: FileSystem = FileSystem.SYSTEM,
234
comment: String? = null
235
) : FileSystem {
236
val comment: String?
237
238
// Implements all FileSystem methods for ZIP file contents
239
override fun canonicalize(path: Path): Path
240
override fun metadataOrNull(path: Path): FileMetadata?
241
override fun list(dir: Path): List<Path>
242
override fun source(file: Path): Source
243
244
// Note: ZIP FileSystem is read-only
245
// write operations throw UnsupportedOperationException
246
}
247
```
248
249
### ZIP File Usage Examples
250
251
```kotlin
252
val fs = FileSystem.SYSTEM
253
val zipPath = "/tmp/example.zip".toPath()
254
255
// Create a ZIP file (using standard Java ZIP APIs for writing)
256
java.util.zip.ZipOutputStream(fs.sink(zipPath).buffer().outputStream()).use { zipOut ->
257
// Add first file
258
zipOut.putNextEntry(java.util.zip.ZipEntry("hello.txt"))
259
zipOut.write("Hello, ZIP world!".toByteArray())
260
zipOut.closeEntry()
261
262
// Add second file in subdirectory
263
zipOut.putNextEntry(java.util.zip.ZipEntry("subdir/file2.txt"))
264
zipOut.write("File in subdirectory".toByteArray())
265
zipOut.closeEntry()
266
267
// Add empty directory
268
zipOut.putNextEntry(java.util.zip.ZipEntry("empty-dir/"))
269
zipOut.closeEntry()
270
}
271
272
// Read ZIP file using Okio ZipFileSystem
273
val zipFs = ZipFileSystem(zipPath)
274
275
// List contents of ZIP file
276
println("ZIP file contents:")
277
zipFs.listRecursively("/".toPath()).forEach { path ->
278
val metadata = zipFs.metadataOrNull(path)
279
val type = when {
280
metadata?.isDirectory == true -> "[DIR]"
281
metadata?.isRegularFile == true -> "[FILE]"
282
else -> "[OTHER]"
283
}
284
val size = metadata?.size?.let { " (${it} bytes)" } ?: ""
285
println("$type $path$size")
286
}
287
288
// Read files from ZIP
289
val helloContent = zipFs.read("/hello.txt".toPath()) {
290
readUtf8()
291
}
292
println("Content of hello.txt: '$helloContent'")
293
294
val subdirContent = zipFs.read("/subdir/file2.txt".toPath()) {
295
readUtf8()
296
}
297
println("Content of subdir/file2.txt: '$subdirContent'")
298
299
// Check ZIP comment
300
println("ZIP comment: ${zipFs.comment}")
301
302
// Cleanup
303
fs.delete(zipPath)
304
```
305
306
### ZIP File Metadata
307
308
```kotlin
309
val zipFs = ZipFileSystem(zipPath)
310
311
// Get detailed metadata for ZIP entries
312
zipFs.list("/".toPath()).forEach { path ->
313
val metadata = zipFs.metadata(path)
314
315
println("Path: $path")
316
println(" Type: ${if (metadata.isDirectory) "Directory" else "File"}")
317
println(" Size: ${metadata.size} bytes")
318
319
metadata.lastModifiedAtMillis?.let { timestamp ->
320
val date = java.util.Date(timestamp)
321
println(" Modified: $date")
322
}
323
324
// ZIP-specific metadata in extras
325
metadata.extras.forEach { (type, value) ->
326
println(" Extra ${type.simpleName}: $value")
327
}
328
}
329
```
330
331
## Compression Utilities
332
333
### Choosing Compression Methods
334
335
```kotlin
336
// Utility function to compare compression methods
337
fun compareCompressionMethods(data: String) {
338
val originalSize = data.length
339
340
// GZIP compression
341
val gzipBuffer = Buffer()
342
gzipBuffer.gzip().use { sink ->
343
sink.writeUtf8(data)
344
}
345
val gzipSize = gzipBuffer.size
346
347
// Raw deflate compression
348
val deflateBuffer = Buffer()
349
val deflater = Deflater(Deflater.DEFAULT_COMPRESSION, nowrap = true)
350
DeflaterSink(deflateBuffer, deflater).use { sink ->
351
sink.writeUtf8(data)
352
}
353
val deflateSize = deflateBuffer.size
354
355
println("Original: $originalSize bytes")
356
println("GZIP: $gzipSize bytes (${gzipSize.toFloat() / originalSize * 100}%)")
357
println("Deflate: $deflateSize bytes (${deflateSize.toFloat() / originalSize * 100}%)")
358
println("GZIP overhead: ${gzipSize - deflateSize} bytes")
359
}
360
361
// Test with different types of data
362
compareCompressionMethods("A".repeat(1000)) // Highly repetitive
363
compareCompressionMethods("The quick brown fox jumps over the lazy dog. ".repeat(50)) // Natural text
364
compareCompressionMethods((0..255).map { it.toChar() }.joinToString("")) // Random-like data
365
```
366
367
### Streaming Compression
368
369
```kotlin
370
// Compress large amounts of data without loading everything into memory
371
fun compressLargeFile(inputPath: Path, outputPath: Path) {
372
val fs = FileSystem.SYSTEM
373
374
fs.sink(outputPath).buffer().gzip().use { compressedSink ->
375
fs.source(inputPath).buffer().use { source ->
376
// Process in chunks to avoid memory issues
377
while (!source.exhausted()) {
378
val chunk = source.readByteString(minOf(8192L, source.buffer.size))
379
compressedSink.write(chunk)
380
}
381
}
382
}
383
}
384
385
// Decompress with progress monitoring
386
fun decompressWithProgress(inputPath: Path, outputPath: Path) {
387
val fs = FileSystem.SYSTEM
388
val totalSize = fs.metadata(inputPath).size ?: 0L
389
var processedBytes = 0L
390
391
fs.sink(outputPath).buffer().use { output ->
392
fs.source(inputPath).buffer().gzip().use { compressedSource ->
393
val buffer = Buffer()
394
395
while (!compressedSource.exhausted()) {
396
val bytesRead = compressedSource.read(buffer, 8192L)
397
if (bytesRead > 0) {
398
output.write(buffer, bytesRead)
399
processedBytes += bytesRead
400
val progress = (processedBytes.toFloat() / totalSize * 100).toInt()
401
print("\rDecompressing: $progress%")
402
}
403
}
404
}
405
}
406
println("\nDecompression complete!")
407
}
408
```
409
410
## Error Handling
411
412
Compression operations can encounter various error conditions:
413
414
```kotlin { .api }
415
expect open class IOException : Exception
416
expect class DataFormatException : Exception // Invalid compressed data format
417
expect class ZipException : IOException // ZIP file format issues
418
```
419
420
### Common Error Scenarios
421
422
```kotlin
423
// Handle corrupted compressed data
424
fun safeDecompress(compressedData: ByteString): String? {
425
return try {
426
Buffer().write(compressedData).gzip().buffer().readUtf8()
427
} catch (e: DataFormatException) {
428
println("Corrupted compressed data: ${e.message}")
429
null
430
} catch (e: IOException) {
431
println("I/O error during decompression: ${e.message}")
432
null
433
}
434
}
435
436
// Handle ZIP file errors
437
fun safeReadZip(zipPath: Path): List<String> {
438
return try {
439
val zipFs = ZipFileSystem(zipPath)
440
zipFs.listRecursively("/".toPath())
441
.filter { path -> zipFs.metadata(path).isRegularFile }
442
.map { path -> path.toString() }
443
.toList()
444
} catch (e: ZipException) {
445
println("Invalid ZIP file: ${e.message}")
446
emptyList()
447
} catch (e: FileNotFoundException) {
448
println("ZIP file not found: ${e.message}")
449
emptyList()
450
}
451
}
452
453
// Resource cleanup with error handling
454
fun compressWithCleanup(data: String): ByteString? {
455
val deflater = Deflater()
456
val buffer = Buffer()
457
458
return try {
459
DeflaterSink(buffer, deflater).use { sink ->
460
sink.writeUtf8(data)
461
}
462
buffer.readByteString()
463
} catch (e: Exception) {
464
println("Compression failed: ${e.message}")
465
null
466
} finally {
467
// Ensure native resources are released
468
deflater.end()
469
}
470
}
471
```
472
473
## Performance Considerations
474
475
### Memory Usage
476
477
```kotlin
478
// Efficient streaming for large data
479
fun efficientCompression(inputSource: Source, outputSink: Sink) {
480
// Use buffered streams to optimize I/O
481
val bufferedOutput = outputSink.buffer()
482
val gzipSink = bufferedOutput.gzip()
483
484
inputSource.buffer().use { bufferedInput ->
485
gzipSink.use { compressor ->
486
// Process in reasonable chunks
487
val buffer = Buffer()
488
while (!bufferedInput.exhausted()) {
489
val bytesRead = bufferedInput.read(buffer, 16384L) // 16KB chunks
490
if (bytesRead > 0) {
491
compressor.write(buffer, bytesRead)
492
}
493
}
494
}
495
}
496
}
497
```
498
499
### Compression Level Trade-offs
500
501
```kotlin
502
// Benchmark different compression levels
503
fun benchmarkCompression(data: ByteString) {
504
val levels = listOf(
505
Deflater.BEST_SPEED to "Best Speed",
506
Deflater.DEFAULT_COMPRESSION to "Default",
507
Deflater.BEST_COMPRESSION to "Best Compression"
508
)
509
510
levels.forEach { (level, name) ->
511
val startTime = System.currentTimeMillis()
512
513
val buffer = Buffer()
514
val deflater = Deflater(level, nowrap = false)
515
DeflaterSink(buffer, deflater).use { sink ->
516
sink.write(data)
517
}
518
519
val endTime = System.currentTimeMillis()
520
val compressedSize = buffer.size
521
val ratio = compressedSize.toFloat() / data.size
522
523
println("$name: ${compressedSize} bytes (${ratio * 100}%) in ${endTime - startTime}ms")
524
525
deflater.end()
526
}
527
}
528
```