or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli-tools.mdconstants-enums.mdcore-io.mdfile-classes.mdindex.mdmetadata-tags.mdutilities.mdzarr-integration.md

zarr-integration.mddocs/

0

# Zarr Integration

1

2

Zarr store implementations for cloud-native access to TIFF files and file sequences, enabling scalable processing of large scientific datasets without loading entire files into memory. These stores provide compatibility with the Zarr ecosystem for distributed computing and analysis workflows.

3

4

**Note**: These classes are available in the `tifffile.zarr` module, not the main `tifffile` module.

5

6

```python

7

from tifffile.zarr import ZarrTiffStore, ZarrFileSequenceStore, ZarrStore

8

```

9

10

## Capabilities

11

12

### ZarrTiffStore Class

13

14

Zarr store interface for individual TIFF files, providing chunked access to image data with lazy loading and memory-efficient processing.

15

16

```python { .api }

17

class ZarrTiffStore:

18

def __init__(

19

self,

20

tifffile,

21

*,

22

key=None,

23

series=None,

24

level=None,

25

chunkmode=None,

26

fillvalue=None,

27

zattrs=None,

28

**kwargs

29

):

30

"""

31

Initialize Zarr store for TIFF file.

32

33

Parameters:

34

- tifffile: TiffFile instance or file path

35

- key: int, slice, or sequence of page indices

36

- series: int, series index for multi-series files

37

- level: int, pyramid level for multi-resolution files

38

- chunkmode: CHUNKMODE enum, chunking strategy

39

- fillvalue: numeric, fill value for missing data

40

- zattrs: dict, additional Zarr attributes

41

"""

42

43

def __getitem__(self, key):

44

"""Get data chunk by key."""

45

46

def __setitem__(self, key, value):

47

"""Set data chunk (read-only store, raises NotImplementedError)."""

48

49

def __contains__(self, key):

50

"""Check if key exists in store."""

51

52

def __iter__(self):

53

"""Iterate over store keys."""

54

55

def keys(self):

56

"""Return all keys in store."""

57

58

@property

59

def shape(self):

60

"""tuple: Shape of the array."""

61

62

@property

63

def dtype(self):

64

"""np.dtype: Data type of array elements."""

65

66

@property

67

def chunks(self):

68

"""tuple: Chunk dimensions."""

69

```

70

71

#### Usage Examples

72

73

```python

74

# Create Zarr store from TIFF file

75

with tifffile.TiffFile('large.tif') as tif:

76

store = tif.aszarr()

77

print(f"Shape: {store.shape}")

78

print(f"Chunks: {store.chunks}")

79

80

# Access data chunks

81

chunk = store[0:1000, 0:1000]

82

83

# Direct creation from file path

84

store = tifffile.imread('huge.tif', aszarr=True)

85

86

# Use with Zarr array

87

import zarr

88

z_array = zarr.open(store, mode='r')

89

print(f"Zarr array: {z_array}")

90

91

# Process in chunks with Dask

92

import dask.array as da

93

dask_array = da.from_zarr(store)

94

result = dask_array.mean(axis=0).compute()

95

```

96

97

### ZarrFileSequenceStore Class

98

99

Zarr store for sequences of TIFF files, treating multiple files as a single logical array with an additional time or sequence dimension.

100

101

```python { .api }

102

class ZarrFileSequenceStore:

103

def __init__(

104

self,

105

files,

106

*,

107

imread=None,

108

pattern=None,

109

axesorder=None,

110

categories=None,

111

chunkmode=None,

112

fillvalue=None,

113

zattrs=None,

114

**kwargs

115

):

116

"""

117

Initialize Zarr store for file sequence.

118

119

Parameters:

120

- files: sequence of file paths or glob pattern

121

- imread: callable, custom function for reading files

122

- pattern: str, glob pattern for file matching

123

- axesorder: sequence of ints, axis reordering

124

- categories: dict, categorical data mappings

125

- chunkmode: CHUNKMODE enum, chunking strategy

126

- fillvalue: numeric, fill value for missing data

127

- zattrs: dict, additional Zarr attributes

128

"""

129

130

def __getitem__(self, key):

131

"""Get data chunk by key."""

132

133

def __setitem__(self, key, value):

134

"""Set data chunk (read-only store, raises NotImplementedError)."""

135

136

@property

137

def shape(self):

138

"""tuple: Shape including sequence dimension."""

139

140

@property

141

def dtype(self):

142

"""np.dtype: Data type of array elements."""

143

144

@property

145

def chunks(self):

146

"""tuple: Chunk dimensions."""

147

148

@property

149

def files(self):

150

"""list: File paths in sequence."""

151

```

152

153

#### Usage Examples

154

155

```python

156

# Create store from file list

157

files = ['img001.tif', 'img002.tif', 'img003.tif']

158

store = tifffile.imread(files, aszarr=True)

159

print(f"Sequence shape: {store.shape}") # (3, height, width)

160

161

# Create store from glob pattern

162

store = tifffile.imread('timeseries_*.tif', aszarr=True)

163

164

# Access specific timepoints

165

timepoint_0 = store[0] # First file

166

timepoint_slice = store[10:20] # Files 10-19

167

168

# Use with Zarr for time series analysis

169

import zarr

170

z_array = zarr.open(store, mode='r')

171

time_series = z_array[:, 100, 100] # Pixel time series

172

```

173

174

### ZarrStore Base Class

175

176

Base class for Zarr store implementations, providing common functionality.

177

178

```python { .api }

179

class ZarrStore:

180

def __init__(self, **kwargs):

181

"""Initialize base Zarr store."""

182

183

def close(self):

184

"""Close the store and release resources."""

185

186

def __enter__(self):

187

"""Context manager entry."""

188

189

def __exit__(self, exc_type, exc_val, exc_tb):

190

"""Context manager exit."""

191

192

@property

193

def path(self):

194

"""str: Store path or identifier."""

195

196

@property

197

def mode(self):

198

"""str: Store access mode."""

199

```

200

201

## Advanced Usage Patterns

202

203

### Large Dataset Processing

204

205

```python

206

# Process extremely large TIFF files

207

def process_large_tiff(filename, chunk_size=1024):

208

with tifffile.TiffFile(filename) as tif:

209

store = tif.aszarr()

210

211

# Process in chunks to avoid memory issues

212

for y in range(0, store.shape[0], chunk_size):

213

for x in range(0, store.shape[1], chunk_size):

214

y_end = min(y + chunk_size, store.shape[0])

215

x_end = min(x + chunk_size, store.shape[1])

216

217

chunk = store[y:y_end, x:x_end]

218

# Process chunk...

219

processed = chunk * 2 # Example processing

220

221

yield (y, x), processed

222

223

# Usage

224

for (y, x), result in process_large_tiff('huge.tif'):

225

print(f"Processed chunk at ({y}, {x})")

226

```

227

228

### Multi-scale Pyramid Access

229

230

```python

231

# Access different pyramid levels

232

with tifffile.TiffFile('pyramid.tif') as tif:

233

# Full resolution

234

full_res = tif.aszarr(level=0)

235

236

# Lower resolution levels

237

level_1 = tif.aszarr(level=1)

238

level_2 = tif.aszarr(level=2)

239

240

print(f"Level 0: {full_res.shape}")

241

print(f"Level 1: {level_1.shape}")

242

print(f"Level 2: {level_2.shape}")

243

```

244

245

### Time Series Analysis

246

247

```python

248

import zarr

249

import numpy as np

250

251

# Analyze time series data

252

files = [f'timeseries_{i:03d}.tif' for i in range(100)]

253

store = tifffile.imread(files, aszarr=True)

254

z_array = zarr.open(store, mode='r')

255

256

# Calculate statistics over time

257

mean_projection = np.mean(z_array, axis=0)

258

max_projection = np.max(z_array, axis=0)

259

std_projection = np.std(z_array, axis=0)

260

261

# Pixel-wise time series analysis

262

roi_time_series = z_array[:, 100:200, 100:200]

263

roi_mean = np.mean(roi_time_series, axis=(1, 2)) # Mean over ROI per timepoint

264

```

265

266

### Distributed Computing Integration

267

268

```python

269

import dask.array as da

270

from dask.distributed import Client

271

272

# Set up Dask client for distributed processing

273

client = Client('scheduler-address:8786')

274

275

# Create Dask array from Zarr store

276

store = tifffile.imread('large_dataset.tif', aszarr=True)

277

dask_array = da.from_zarr(store, chunks=(1000, 1000))

278

279

# Distributed processing

280

result = dask_array.map_blocks(

281

lambda x: x * 2 + 1, # Example processing function

282

dtype=dask_array.dtype

283

)

284

285

# Compute result

286

output = result.compute()

287

client.close()

288

```

289

290

### Cloud Storage Integration

291

292

```python

293

import fsspec

294

import zarr

295

296

# Access TIFF files from cloud storage

297

def cloud_tiff_store(url, storage_options=None):

298

"""Create Zarr store for cloud-hosted TIFF file."""

299

300

# Open file from cloud storage

301

fs = fsspec.filesystem('s3', **storage_options or {})

302

303

with fs.open(url, 'rb') as f:

304

with tifffile.TiffFile(f) as tif:

305

# Create local Zarr store

306

store = zarr.MemoryStore()

307

zarr_array = zarr.open(store, mode='w',

308

shape=tif.pages[0].shape,

309

dtype=tif.pages[0].dtype,

310

chunks=(1024, 1024))

311

312

# Copy data in chunks

313

tiff_store = tif.aszarr()

314

zarr_array[:] = tiff_store[:]

315

316

return store

317

318

# Usage

319

storage_opts = {'key': 'access_key', 'secret': 'secret_key'}

320

store = cloud_tiff_store('s3://bucket/large_image.tif', storage_opts)

321

```

322

323

### Custom Chunking Strategies

324

325

```python

326

# Optimize chunking for specific access patterns

327

def create_optimized_store(filename, access_pattern='sequential'):

328

with tifffile.TiffFile(filename) as tif:

329

if access_pattern == 'sequential':

330

# Optimize for row-wise access

331

chunkmode = tifffile.CHUNKMODE.PAGE

332

elif access_pattern == 'spatial':

333

# Optimize for spatial locality

334

chunkmode = tifffile.CHUNKMODE.TILE

335

elif access_pattern == 'temporal':

336

# Optimize for time series access

337

chunkmode = tifffile.CHUNKMODE.FRAME

338

else:

339

chunkmode = None

340

341

return tif.aszarr(chunkmode=chunkmode)

342

343

# Usage for different access patterns

344

sequential_store = create_optimized_store('data.tif', 'sequential')

345

spatial_store = create_optimized_store('data.tif', 'spatial')

346

```

347

348

### Memory-Mapped Integration

349

350

```python

351

# Combine memory mapping with Zarr for hybrid access

352

def hybrid_access(filename):

353

# Memory-mapped access for small data

354

mmap_data = tifffile.memmap(filename)

355

356

# Zarr store for chunked access to same data

357

zarr_store = tifffile.imread(filename, aszarr=True)

358

359

return mmap_data, zarr_store

360

361

# Usage

362

mmap_view, zarr_view = hybrid_access('data.tif')

363

364

# Direct memory access

365

small_region = mmap_view[100:200, 100:200]

366

367

# Chunked access for large operations

368

import dask.array as da

369

large_computation = da.from_zarr(zarr_view).sum().compute()

370

```

371

372

## Performance Considerations

373

374

### Chunk Size Optimization

375

376

```python

377

# Determine optimal chunk size based on data characteristics

378

def optimize_chunks(store, memory_limit_mb=100):

379

"""Calculate optimal chunk size for given memory limit."""

380

381

dtype_size = np.dtype(store.dtype).itemsize

382

max_elements = (memory_limit_mb * 1024 * 1024) // dtype_size

383

384

# Calculate chunk dimensions

385

if len(store.shape) == 2:

386

side_length = int(np.sqrt(max_elements))

387

chunk_shape = (min(side_length, store.shape[0]),

388

min(side_length, store.shape[1]))

389

elif len(store.shape) == 3:

390

# For 3D data, keep reasonable z-dimension

391

z_chunk = min(10, store.shape[0])

392

xy_elements = max_elements // z_chunk

393

xy_side = int(np.sqrt(xy_elements))

394

chunk_shape = (z_chunk,

395

min(xy_side, store.shape[1]),

396

min(xy_side, store.shape[2]))

397

398

return chunk_shape

399

```

400

401

### I/O Optimization

402

403

```python

404

# Optimize I/O for large file sequences

405

def efficient_sequence_processing(pattern, process_func):

406

"""Process file sequence with optimized I/O."""

407

408

# Create store with optimized settings

409

store = tifffile.imread(

410

pattern,

411

aszarr=True,

412

maxworkers=4, # Parallel file reading

413

chunkmode='page' # Page-based chunking

414

)

415

416

# Process with Dask for memory efficiency

417

import dask.array as da

418

dask_array = da.from_zarr(store)

419

420

# Apply processing function

421

result = dask_array.map_blocks(

422

process_func,

423

dtype=dask_array.dtype,

424

drop_axis=None

425

)

426

427

return result.compute()

428

```

429

430

## Error Handling

431

432

Common Zarr integration error scenarios:

433

434

```python

435

try:

436

store = tifffile.imread('large.tif', aszarr=True)

437

data = store[1000:2000, 1000:2000]

438

except MemoryError:

439

# Fall back to smaller chunks

440

print("Memory error, using smaller chunks")

441

store = tifffile.imread('large.tif', aszarr=True, chunkmode='tile')

442

data = store[1000:2000, 1000:2000]

443

except ValueError as e:

444

print(f"Invalid chunk access: {e}")

445

except IOError as e:

446

print(f"File access error: {e}")

447

```

448

449

## Dependencies

450

451

Zarr integration requires additional packages:

452

453

```bash

454

# Install with Zarr support

455

pip install tifffile[zarr]

456

457

# Or install dependencies manually

458

pip install zarr>=3 fsspec kerchunk

459

```

460

461

The Zarr integration provides seamless compatibility with the broader Python scientific computing ecosystem, enabling efficient processing of large scientific imaging datasets in cloud-native workflows.