or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dictionary-compression.mddirect-buffer-streaming.mdindex.mdstatic-compression.mdstream-compression.mdutility-functions.md

dictionary-compression.mddocs/

0

# Dictionary-Based Compression

1

2

Dictionary-based compression provides improved compression ratios when compressing similar data by using pre-trained dictionaries. Zstd-jni supports both raw byte array dictionaries and pre-compiled dictionary objects for optimal performance.

3

4

## Capabilities

5

6

### Dictionary Compression with Byte Arrays

7

8

Compress data using raw dictionary byte arrays.

9

10

```java { .api }

11

/**

12

* Compresses data using a byte array dictionary

13

* @param src source data to compress

14

* @param dict dictionary data

15

* @param level compression level (1-22)

16

* @return compressed data as byte array

17

*/

18

public static byte[] compressUsingDict(byte[] src, byte[] dict, int level);

19

20

/**

21

* Compresses source into destination buffer using dictionary

22

* @param dst destination buffer (must be sized using compressBound)

23

* @param src source data to compress

24

* @param dict dictionary data

25

* @param level compression level (1-22)

26

* @return number of bytes written to dst, or error code

27

*/

28

public static long compress(byte[] dst, byte[] src, byte[] dict, int level);

29

30

/**

31

* ByteBuffer compression with byte array dictionary

32

* @param dstBuf destination buffer (must be direct)

33

* @param srcBuf source buffer (must be direct)

34

* @param dict dictionary data

35

* @param level compression level (1-22)

36

* @return number of bytes written to destination

37

*/

38

public static int compress(ByteBuffer dstBuf, ByteBuffer srcBuf, byte[] dict, int level);

39

40

/**

41

* ByteBuffer compression with dictionary, returns new buffer

42

* @param srcBuf source buffer (must be direct)

43

* @param dict dictionary data

44

* @param level compression level (1-22)

45

* @return new direct ByteBuffer containing compressed data

46

*/

47

public static ByteBuffer compress(ByteBuffer srcBuf, byte[] dict, int level);

48

```

49

50

**Usage Examples:**

51

52

```java

53

import com.github.luben.zstd.Zstd;

54

55

// Create dictionary from sample data

56

String[] samples = {

57

"The quick brown fox jumps over the lazy dog",

58

"The lazy dog sleeps under the quick brown fox",

59

"A quick brown fox and a lazy dog are friends"

60

};

61

byte[][] sampleBytes = Arrays.stream(samples)

62

.map(String::getBytes)

63

.toArray(byte[][]::new);

64

65

// Train dictionary

66

byte[] dictionary = new byte[1024];

67

long dictSize = Zstd.trainFromBuffer(sampleBytes, dictionary);

68

if (Zstd.isError(dictSize)) {

69

throw new RuntimeException("Dictionary training failed");

70

}

71

dictionary = Arrays.copyOf(dictionary, (int) dictSize);

72

73

// Compress with dictionary

74

byte[] data = "The quick brown fox runs fast".getBytes();

75

byte[] compressed = Zstd.compressUsingDict(data, dictionary, 6);

76

```

77

78

### Dictionary Compression with Dictionary Objects

79

80

Use pre-compiled dictionary objects for better performance when reusing dictionaries.

81

82

```java { .api }

83

/**

84

* Compresses data using a pre-compiled compression dictionary

85

* @param src source data to compress

86

* @param dict pre-compiled compression dictionary

87

* @return compressed data as byte array

88

*/

89

public static byte[] compress(byte[] src, ZstdDictCompress dict);

90

91

/**

92

* ByteBuffer compression with pre-compiled dictionary

93

* @param dstBuf destination buffer (must be direct)

94

* @param srcBuf source buffer (must be direct)

95

* @param dict pre-compiled compression dictionary

96

* @return number of bytes written to destination

97

*/

98

public static int compress(ByteBuffer dstBuf, ByteBuffer srcBuf, ZstdDictCompress dict);

99

100

/**

101

* ByteBuffer compression with dictionary, returns new buffer

102

* @param srcBuf source buffer (must be direct)

103

* @param dict pre-compiled compression dictionary

104

* @return new direct ByteBuffer containing compressed data

105

*/

106

public static ByteBuffer compress(ByteBuffer srcBuf, ZstdDictCompress dict);

107

```

108

109

**Usage Examples:**

110

111

```java

112

import com.github.luben.zstd.ZstdDictCompress;

113

114

// Create pre-compiled dictionary for reuse

115

try (ZstdDictCompress dict = new ZstdDictCompress(dictionary, 6)) {

116

// Compress multiple pieces of data efficiently

117

byte[] data1 = "The quick brown fox".getBytes();

118

byte[] data2 = "The lazy dog sleeps".getBytes();

119

120

byte[] compressed1 = Zstd.compress(data1, dict);

121

byte[] compressed2 = Zstd.compress(data2, dict);

122

}

123

```

124

125

### Dictionary Decompression with Byte Arrays

126

127

Decompress data that was compressed with dictionary using raw byte array dictionaries.

128

129

```java { .api }

130

/**

131

* Decompresses data using a byte array dictionary

132

* @param src compressed data

133

* @param dict dictionary data (same as used for compression)

134

* @param originalSize size of original uncompressed data

135

* @return decompressed data as byte array

136

*/

137

public static byte[] decompress(byte[] src, byte[] dict, int originalSize);

138

139

/**

140

* Decompresses source into destination buffer using dictionary

141

* @param dst destination buffer (must be sized to original size)

142

* @param src compressed data

143

* @param dict dictionary data

144

* @return number of bytes written to dst, or error code

145

*/

146

public static long decompress(byte[] dst, byte[] src, byte[] dict);

147

148

/**

149

* ByteBuffer decompression with byte array dictionary

150

* @param dstBuf destination buffer (must be direct)

151

* @param srcBuf source buffer (must be direct)

152

* @param dict dictionary data

153

* @return number of bytes written to destination

154

*/

155

public static int decompress(ByteBuffer dstBuf, ByteBuffer srcBuf, byte[] dict);

156

157

/**

158

* ByteBuffer decompression with dictionary, returns new buffer

159

* @param srcBuf source buffer (must be direct)

160

* @param dict dictionary data

161

* @param originalSize size of original uncompressed data

162

* @return new direct ByteBuffer containing decompressed data

163

*/

164

public static ByteBuffer decompress(ByteBuffer srcBuf, byte[] dict, int originalSize);

165

```

166

167

### Dictionary Decompression with Dictionary Objects

168

169

Use pre-compiled decompression dictionary objects for better performance.

170

171

```java { .api }

172

/**

173

* Decompresses data using a pre-compiled decompression dictionary

174

* @param src compressed data

175

* @param dict pre-compiled decompression dictionary

176

* @param originalSize size of original uncompressed data

177

* @return decompressed data as byte array

178

*/

179

public static byte[] decompress(byte[] src, ZstdDictDecompress dict, int originalSize);

180

181

/**

182

* ByteBuffer decompression with pre-compiled dictionary

183

* @param dstBuf destination buffer (must be direct)

184

* @param srcBuf source buffer (must be direct)

185

* @param dict pre-compiled decompression dictionary

186

* @return number of bytes written to destination

187

*/

188

public static int decompress(ByteBuffer dstBuf, ByteBuffer srcBuf, ZstdDictDecompress dict);

189

190

/**

191

* ByteBuffer decompression with dictionary, returns new buffer

192

* @param srcBuf source buffer (must be direct)

193

* @param dict pre-compiled decompression dictionary

194

* @param originalSize size of original uncompressed data

195

* @return new direct ByteBuffer containing decompressed data

196

*/

197

public static ByteBuffer decompress(ByteBuffer srcBuf, ZstdDictDecompress dict, int originalSize);

198

```

199

200

**Usage Examples:**

201

202

```java

203

import com.github.luben.zstd.ZstdDictDecompress;

204

205

// Create pre-compiled decompression dictionary

206

try (ZstdDictDecompress dict = new ZstdDictDecompress(dictionary)) {

207

// Decompress multiple pieces of data efficiently

208

byte[] decompressed1 = Zstd.decompress(compressed1, dict, originalSize1);

209

byte[] decompressed2 = Zstd.decompress(compressed2, dict, originalSize2);

210

}

211

```

212

213

### Dictionary Training

214

215

Create optimized dictionaries from sample data.

216

217

```java { .api }

218

/**

219

* Creates a dictionary from sample data

220

* @param samples array of sample byte arrays representing typical data

221

* @param dictBuffer buffer to store the created dictionary

222

* @return size of dictionary written to buffer, or error code

223

*/

224

public static long trainFromBuffer(byte[][] samples, byte[] dictBuffer);

225

```

226

227

**Usage Examples:**

228

229

```java

230

// Collect sample data representative of what you'll compress

231

List<String> sampleTexts = Arrays.asList(

232

"Sample text with common patterns",

233

"Another sample with similar patterns",

234

"More sample text following the same structure"

235

);

236

237

byte[][] samples = sampleTexts.stream()

238

.map(String::getBytes)

239

.toArray(byte[][]::new);

240

241

// Train dictionary (size should be much smaller than total sample size)

242

byte[] dictBuffer = new byte[4096]; // 4KB dictionary

243

long dictSize = Zstd.trainFromBuffer(samples, dictBuffer);

244

245

if (Zstd.isError(dictSize)) {

246

throw new RuntimeException("Dictionary training failed: " + Zstd.getErrorName(dictSize));

247

}

248

249

// Trim dictionary to actual size

250

byte[] dictionary = Arrays.copyOf(dictBuffer, (int) dictSize);

251

```

252

253

## Dictionary Objects

254

255

```java { .api }

256

/**

257

* Pre-compiled compression dictionary for efficient reuse

258

*/

259

class ZstdDictCompress implements Closeable {

260

/**

261

* Creates compression dictionary from byte array

262

* @param dict dictionary data

263

* @param level compression level to compile into dictionary

264

*/

265

public ZstdDictCompress(byte[] dict, int level);

266

267

/**

268

* Creates compression dictionary from byte array segment

269

* @param dict dictionary data buffer

270

* @param offset offset in buffer

271

* @param length number of bytes to use

272

* @param level compression level to compile into dictionary

273

*/

274

public ZstdDictCompress(byte[] dict, int offset, int length, int level);

275

276

/**

277

* Releases native dictionary resources

278

*/

279

public void close() throws IOException;

280

}

281

282

/**

283

* Pre-compiled decompression dictionary for efficient reuse

284

*/

285

class ZstdDictDecompress implements Closeable {

286

/**

287

* Creates decompression dictionary from byte array

288

* @param dict dictionary data

289

*/

290

public ZstdDictDecompress(byte[] dict);

291

292

/**

293

* Creates decompression dictionary from byte array segment

294

* @param dict dictionary data buffer

295

* @param offset offset in buffer

296

* @param length number of bytes to use

297

*/

298

public ZstdDictDecompress(byte[] dict, int offset, int length);

299

300

/**

301

* Releases native dictionary resources

302

*/

303

public void close() throws IOException;

304

}

305

```

306

307

## Performance Tips

308

309

- **Dictionary reuse**: Use pre-compiled dictionary objects (ZstdDictCompress/ZstdDictDecompress) when compressing multiple data items

310

- **Dictionary size**: Optimal dictionary size is typically 100KB or less for most use cases

311

- **Training data**: Use representative sample data that closely matches your actual compression workload

312

- **Memory management**: Always close dictionary objects to free native memory

313

- **Compression improvement**: Dictionaries work best on data with repeated patterns or similar structure