or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-creation.mdcodecs.mdconfiguration.mdcore-classes.mddata-access.mddata-io.mdgroup-management.mdindex.mdstorage-backends.md

codecs.mddocs/

0

# Codecs

1

2

Codec classes for data compression, transformation, and encoding. These enable efficient storage through various compression algorithms and data transformations that can be applied to zarr arrays.

3

4

## Capabilities

5

6

### Compression Codecs

7

8

```python { .api }

9

class BloscCodec:

10

"""Blosc compression codec with multiple algorithms."""

11

12

def __init__(

13

self,

14

cname: BloscCname = 'zstd',

15

clevel: int = 5,

16

shuffle: BloscShuffle = BloscShuffle.SHUFFLE,

17

typesize: int = None,

18

blocksize: int = 0,

19

**kwargs

20

): ...

21

22

@property

23

def cname(self) -> BloscCname: ...

24

@property

25

def clevel(self) -> int: ...

26

@property

27

def shuffle(self) -> BloscShuffle: ...

28

```

29

30

```python { .api }

31

class BloscCname(Enum):

32

"""Blosc compression algorithms."""

33

LZ4 = "lz4"

34

LZ4HC = "lz4hc"

35

ZLIB = "zlib"

36

ZSTD = "zstd"

37

BLOSCLZ = "blosclz"

38

SNAPPY = "snappy"

39

```

40

41

```python { .api }

42

class BloscShuffle(Enum):

43

"""Blosc shuffle options."""

44

NOSHUFFLE = 0

45

SHUFFLE = 1

46

BITSHUFFLE = 2

47

```

48

49

```python { .api }

50

class GzipCodec:

51

"""Gzip compression codec."""

52

53

def __init__(self, level: int = 6, **kwargs): ...

54

55

@property

56

def level(self) -> int: ...

57

```

58

59

```python { .api }

60

class ZstdCodec:

61

"""Zstandard compression codec."""

62

63

def __init__(

64

self,

65

level: int = 3,

66

checksum: bool = False,

67

**kwargs

68

): ...

69

70

@property

71

def level(self) -> int: ...

72

@property

73

def checksum(self) -> bool: ...

74

```

75

76

### Array Processing Codecs

77

78

```python { .api }

79

class BytesCodec:

80

"""Array to bytes conversion codec."""

81

82

def __init__(self, endian: Endian = Endian.LITTLE, **kwargs): ...

83

84

@property

85

def endian(self) -> Endian: ...

86

```

87

88

```python { .api }

89

class Endian(Enum):

90

"""Byte order options."""

91

BIG = "big"

92

LITTLE = "little"

93

NATIVE = "native"

94

```

95

96

```python { .api }

97

class TransposeCodec:

98

"""Array transposition codec for dimension reordering."""

99

100

def __init__(self, order: tuple[int, ...], **kwargs): ...

101

102

@property

103

def order(self) -> tuple[int, ...]: ...

104

```

105

106

### Advanced Codecs

107

108

```python { .api }

109

class ShardingCodec:

110

"""Sharding codec for subdividing chunks into smaller shards."""

111

112

def __init__(

113

self,

114

chunk_shape: tuple[int, ...],

115

codecs: list[Codec],

116

index_codecs: list[Codec] = None,

117

index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.END,

118

**kwargs

119

): ...

120

121

@property

122

def chunk_shape(self) -> tuple[int, ...]: ...

123

@property

124

def codecs(self) -> list[Codec]: ...

125

@property

126

def index_codecs(self) -> list[Codec]: ...

127

@property

128

def index_location(self) -> ShardingCodecIndexLocation: ...

129

```

130

131

```python { .api }

132

class ShardingCodecIndexLocation(Enum):

133

"""Shard index storage location."""

134

START = "start"

135

END = "end"

136

```

137

138

### String and Variable-Length Data Codecs

139

140

```python { .api }

141

class VLenUTF8Codec:

142

"""Variable-length UTF-8 string codec."""

143

144

def __init__(self, **kwargs): ...

145

```

146

147

```python { .api }

148

class VLenBytesCodec:

149

"""Variable-length bytes codec."""

150

151

def __init__(self, **kwargs): ...

152

```

153

154

### Checksum and Integrity Codecs

155

156

```python { .api }

157

class Crc32cCodec:

158

"""CRC32C checksum codec for data integrity."""

159

160

def __init__(self, **kwargs): ...

161

```

162

163

## Type Definitions

164

165

```python { .api }

166

Codec = Union[

167

BloscCodec, GzipCodec, ZstdCodec, BytesCodec,

168

TransposeCodec, ShardingCodec, VLenUTF8Codec,

169

VLenBytesCodec, Crc32cCodec

170

]

171

172

CompressorLike = Union[str, dict, Codec, None]

173

FiltersLike = Union[list[Codec], None]

174

```

175

176

## Usage Examples

177

178

### Basic Compression

179

180

```python

181

import zarr

182

from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec

183

184

# Create array with Blosc compression

185

blosc_codec = BloscCodec(cname='zstd', clevel=3, shuffle='shuffle')

186

arr = zarr.create(

187

shape=(1000, 1000),

188

chunks=(100, 100),

189

compressor=blosc_codec

190

)

191

192

# Use Gzip compression

193

gzip_codec = GzipCodec(level=6)

194

arr = zarr.create(shape=(500, 500), compressor=gzip_codec)

195

196

# Use Zstandard compression

197

zstd_codec = ZstdCodec(level=5, checksum=True)

198

arr = zarr.create(shape=(800, 800), compressor=zstd_codec)

199

```

200

201

### Codec Pipelines

202

203

```python

204

from zarr.codecs import BloscCodec, BytesCodec, TransposeCodec

205

206

# Create multi-stage codec pipeline

207

codecs = [

208

TransposeCodec(order=(1, 0)), # Transpose dimensions

209

BloscCodec(cname='lz4', clevel=1), # Compress

210

BytesCodec(endian='little') # Convert to bytes

211

]

212

213

arr = zarr.create(

214

shape=(1000, 2000),

215

chunks=(100, 200),

216

codecs=codecs

217

)

218

```

219

220

### Sharding for Small Chunks

221

222

```python

223

from zarr.codecs import ShardingCodec, BloscCodec, BytesCodec

224

225

# Use sharding to group small chunks efficiently

226

shard_codec = ShardingCodec(

227

chunk_shape=(10, 10), # Shard shape within chunk

228

codecs=[

229

BloscCodec(cname='zstd', clevel=3),

230

BytesCodec()

231

],

232

index_location='end'

233

)

234

235

arr = zarr.create(

236

shape=(10000, 10000),

237

chunks=(100, 100), # Main chunk size

238

codecs=[shard_codec]

239

)

240

```

241

242

### String Data Handling

243

244

```python

245

from zarr.codecs import VLenUTF8Codec

246

247

# Array of variable-length strings

248

string_codec = VLenUTF8Codec()

249

str_arr = zarr.create(

250

shape=(1000,),

251

dtype='<U', # Variable-length unicode

252

codecs=[string_codec]

253

)

254

255

str_arr[0] = "Hello, world!"

256

str_arr[1] = "Variable length strings work great with zarr"

257

```

258

259

### Data Integrity with Checksums

260

261

```python

262

from zarr.codecs import Crc32cCodec, BloscCodec, BytesCodec

263

264

# Add checksum for data integrity

265

codecs = [

266

BloscCodec(cname='zstd', clevel=3),

267

BytesCodec(),

268

Crc32cCodec() # Add CRC32C checksum

269

]

270

271

arr = zarr.create(

272

shape=(5000, 5000),

273

chunks=(500, 500),

274

codecs=codecs

275

)

276

```

277

278

### Performance Optimization Examples

279

280

```python

281

# Fast compression for temporary data

282

fast_codecs = [

283

BloscCodec(cname='lz4', clevel=1, shuffle='noshuffle'),

284

BytesCodec()

285

]

286

287

# Maximum compression for archival

288

archive_codecs = [

289

BloscCodec(cname='zstd', clevel=9, shuffle='bitshuffle'),

290

BytesCodec()

291

]

292

293

# Optimize for numerical data patterns

294

numeric_codecs = [

295

TransposeCodec(order=(2, 1, 0)), # Reorder for better compression

296

BloscCodec(cname='zstd', clevel=3, shuffle='shuffle'),

297

BytesCodec()

298

]

299

```

300

301

### Custom Codec Configuration

302

303

```python

304

from zarr.codecs import BloscCodec, BloscCname, BloscShuffle

305

306

# Fine-tune Blosc parameters

307

codec = BloscCodec(

308

cname=BloscCname.ZSTD,

309

clevel=7, # Higher compression

310

shuffle=BloscShuffle.BITSHUFFLE, # Better for numerical data

311

blocksize=2**16 # 64KB blocks

312

)

313

314

# Configure for specific data types

315

float_codec = BloscCodec(

316

cname='zstd',

317

shuffle='shuffle', # Good for floating point

318

typesize=8 # 8-byte floats

319

)

320

321

int_codec = BloscCodec(

322

cname='lz4hc',

323

shuffle='bitshuffle', # Excellent for integers

324

typesize=4 # 4-byte integers

325

)

326

```