Read and write TIFF files for scientific and bioimaging applications with comprehensive format support
Zarr store implementations for cloud-native access to TIFF files and file sequences, enabling scalable processing of large scientific datasets without loading entire files into memory. These stores provide compatibility with the Zarr ecosystem for distributed computing and analysis workflows.
Note: These classes are available in the tifffile.zarr module, not the main tifffile module.
from tifffile.zarr import ZarrTiffStore, ZarrFileSequenceStore, ZarrStoreZarr store interface for individual TIFF files, providing chunked access to image data with lazy loading and memory-efficient processing.
class ZarrTiffStore:
def __init__(
self,
tifffile,
*,
key=None,
series=None,
level=None,
chunkmode=None,
fillvalue=None,
zattrs=None,
**kwargs
):
"""
Initialize Zarr store for TIFF file.
Parameters:
- tifffile: TiffFile instance or file path
- key: int, slice, or sequence of page indices
- series: int, series index for multi-series files
- level: int, pyramid level for multi-resolution files
- chunkmode: CHUNKMODE enum, chunking strategy
- fillvalue: numeric, fill value for missing data
- zattrs: dict, additional Zarr attributes
"""
def __getitem__(self, key):
"""Get data chunk by key."""
def __setitem__(self, key, value):
"""Set data chunk (read-only store, raises NotImplementedError)."""
def __contains__(self, key):
"""Check if key exists in store."""
def __iter__(self):
"""Iterate over store keys."""
def keys(self):
"""Return all keys in store."""
@property
def shape(self):
"""tuple: Shape of the array."""
@property
def dtype(self):
"""np.dtype: Data type of array elements."""
@property
def chunks(self):
"""tuple: Chunk dimensions."""# Create Zarr store from TIFF file
with tifffile.TiffFile('large.tif') as tif:
store = tif.aszarr()
print(f"Shape: {store.shape}")
print(f"Chunks: {store.chunks}")
# Access data chunks
chunk = store[0:1000, 0:1000]
# Direct creation from file path
store = tifffile.imread('huge.tif', aszarr=True)
# Use with Zarr array
import zarr
z_array = zarr.open(store, mode='r')
print(f"Zarr array: {z_array}")
# Process in chunks with Dask
import dask.array as da
dask_array = da.from_zarr(store)
result = dask_array.mean(axis=0).compute()Zarr store for sequences of TIFF files, treating multiple files as a single logical array with an additional time or sequence dimension.
class ZarrFileSequenceStore:
def __init__(
self,
files,
*,
imread=None,
pattern=None,
axesorder=None,
categories=None,
chunkmode=None,
fillvalue=None,
zattrs=None,
**kwargs
):
"""
Initialize Zarr store for file sequence.
Parameters:
- files: sequence of file paths or glob pattern
- imread: callable, custom function for reading files
- pattern: str, glob pattern for file matching
- axesorder: sequence of ints, axis reordering
- categories: dict, categorical data mappings
- chunkmode: CHUNKMODE enum, chunking strategy
- fillvalue: numeric, fill value for missing data
- zattrs: dict, additional Zarr attributes
"""
def __getitem__(self, key):
"""Get data chunk by key."""
def __setitem__(self, key, value):
"""Set data chunk (read-only store, raises NotImplementedError)."""
@property
def shape(self):
"""tuple: Shape including sequence dimension."""
@property
def dtype(self):
"""np.dtype: Data type of array elements."""
@property
def chunks(self):
"""tuple: Chunk dimensions."""
@property
def files(self):
"""list: File paths in sequence."""# Create store from file list
files = ['img001.tif', 'img002.tif', 'img003.tif']
store = tifffile.imread(files, aszarr=True)
print(f"Sequence shape: {store.shape}") # (3, height, width)
# Create store from glob pattern
store = tifffile.imread('timeseries_*.tif', aszarr=True)
# Access specific timepoints
timepoint_0 = store[0] # First file
timepoint_slice = store[10:20] # Files 10-19
# Use with Zarr for time series analysis
import zarr
z_array = zarr.open(store, mode='r')
time_series = z_array[:, 100, 100] # Pixel time seriesBase class for Zarr store implementations, providing common functionality.
class ZarrStore:
def __init__(self, **kwargs):
"""Initialize base Zarr store."""
def close(self):
"""Close the store and release resources."""
def __enter__(self):
"""Context manager entry."""
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
@property
def path(self):
"""str: Store path or identifier."""
@property
def mode(self):
"""str: Store access mode."""# Process extremely large TIFF files
def process_large_tiff(filename, chunk_size=1024):
with tifffile.TiffFile(filename) as tif:
store = tif.aszarr()
# Process in chunks to avoid memory issues
for y in range(0, store.shape[0], chunk_size):
for x in range(0, store.shape[1], chunk_size):
y_end = min(y + chunk_size, store.shape[0])
x_end = min(x + chunk_size, store.shape[1])
chunk = store[y:y_end, x:x_end]
# Process chunk...
processed = chunk * 2 # Example processing
yield (y, x), processed
# Usage
for (y, x), result in process_large_tiff('huge.tif'):
print(f"Processed chunk at ({y}, {x})")# Access different pyramid levels
with tifffile.TiffFile('pyramid.tif') as tif:
# Full resolution
full_res = tif.aszarr(level=0)
# Lower resolution levels
level_1 = tif.aszarr(level=1)
level_2 = tif.aszarr(level=2)
print(f"Level 0: {full_res.shape}")
print(f"Level 1: {level_1.shape}")
print(f"Level 2: {level_2.shape}")import zarr
import numpy as np
# Analyze time series data
files = [f'timeseries_{i:03d}.tif' for i in range(100)]
store = tifffile.imread(files, aszarr=True)
z_array = zarr.open(store, mode='r')
# Calculate statistics over time
mean_projection = np.mean(z_array, axis=0)
max_projection = np.max(z_array, axis=0)
std_projection = np.std(z_array, axis=0)
# Pixel-wise time series analysis
roi_time_series = z_array[:, 100:200, 100:200]
roi_mean = np.mean(roi_time_series, axis=(1, 2)) # Mean over ROI per timepointimport dask.array as da
from dask.distributed import Client
# Set up Dask client for distributed processing
client = Client('scheduler-address:8786')
# Create Dask array from Zarr store
store = tifffile.imread('large_dataset.tif', aszarr=True)
dask_array = da.from_zarr(store, chunks=(1000, 1000))
# Distributed processing
result = dask_array.map_blocks(
lambda x: x * 2 + 1, # Example processing function
dtype=dask_array.dtype
)
# Compute result
output = result.compute()
client.close()import fsspec
import zarr
# Access TIFF files from cloud storage
def cloud_tiff_store(url, storage_options=None):
"""Create Zarr store for cloud-hosted TIFF file."""
# Open file from cloud storage
fs = fsspec.filesystem('s3', **storage_options or {})
with fs.open(url, 'rb') as f:
with tifffile.TiffFile(f) as tif:
# Create local Zarr store
store = zarr.MemoryStore()
zarr_array = zarr.open(store, mode='w',
shape=tif.pages[0].shape,
dtype=tif.pages[0].dtype,
chunks=(1024, 1024))
# Copy data in chunks
tiff_store = tif.aszarr()
zarr_array[:] = tiff_store[:]
return store
# Usage
storage_opts = {'key': 'access_key', 'secret': 'secret_key'}
store = cloud_tiff_store('s3://bucket/large_image.tif', storage_opts)# Optimize chunking for specific access patterns
def create_optimized_store(filename, access_pattern='sequential'):
with tifffile.TiffFile(filename) as tif:
if access_pattern == 'sequential':
# Optimize for row-wise access
chunkmode = tifffile.CHUNKMODE.PAGE
elif access_pattern == 'spatial':
# Optimize for spatial locality
chunkmode = tifffile.CHUNKMODE.TILE
elif access_pattern == 'temporal':
# Optimize for time series access
chunkmode = tifffile.CHUNKMODE.FRAME
else:
chunkmode = None
return tif.aszarr(chunkmode=chunkmode)
# Usage for different access patterns
sequential_store = create_optimized_store('data.tif', 'sequential')
spatial_store = create_optimized_store('data.tif', 'spatial')# Combine memory mapping with Zarr for hybrid access
def hybrid_access(filename):
# Memory-mapped access for small data
mmap_data = tifffile.memmap(filename)
# Zarr store for chunked access to same data
zarr_store = tifffile.imread(filename, aszarr=True)
return mmap_data, zarr_store
# Usage
mmap_view, zarr_view = hybrid_access('data.tif')
# Direct memory access
small_region = mmap_view[100:200, 100:200]
# Chunked access for large operations
import dask.array as da
large_computation = da.from_zarr(zarr_view).sum().compute()# Determine optimal chunk size based on data characteristics
def optimize_chunks(store, memory_limit_mb=100):
"""Calculate optimal chunk size for given memory limit."""
dtype_size = np.dtype(store.dtype).itemsize
max_elements = (memory_limit_mb * 1024 * 1024) // dtype_size
# Calculate chunk dimensions
if len(store.shape) == 2:
side_length = int(np.sqrt(max_elements))
chunk_shape = (min(side_length, store.shape[0]),
min(side_length, store.shape[1]))
elif len(store.shape) == 3:
# For 3D data, keep reasonable z-dimension
z_chunk = min(10, store.shape[0])
xy_elements = max_elements // z_chunk
xy_side = int(np.sqrt(xy_elements))
chunk_shape = (z_chunk,
min(xy_side, store.shape[1]),
min(xy_side, store.shape[2]))
return chunk_shape# Optimize I/O for large file sequences
def efficient_sequence_processing(pattern, process_func):
"""Process file sequence with optimized I/O."""
# Create store with optimized settings
store = tifffile.imread(
pattern,
aszarr=True,
maxworkers=4, # Parallel file reading
chunkmode='page' # Page-based chunking
)
# Process with Dask for memory efficiency
import dask.array as da
dask_array = da.from_zarr(store)
# Apply processing function
result = dask_array.map_blocks(
process_func,
dtype=dask_array.dtype,
drop_axis=None
)
return result.compute()Common Zarr integration error scenarios:
try:
store = tifffile.imread('large.tif', aszarr=True)
data = store[1000:2000, 1000:2000]
except MemoryError:
# Fall back to smaller chunks
print("Memory error, using smaller chunks")
store = tifffile.imread('large.tif', aszarr=True, chunkmode='tile')
data = store[1000:2000, 1000:2000]
except ValueError as e:
print(f"Invalid chunk access: {e}")
except IOError as e:
print(f"File access error: {e}")Zarr integration requires additional packages:
# Install with Zarr support
pip install tifffile[zarr]
# Or install dependencies manually
pip install zarr>=3 fsspec kerchunkThe Zarr integration provides seamless compatibility with the broader Python scientific computing ecosystem, enabling efficient processing of large scientific imaging datasets in cloud-native workflows.
Install with Tessl CLI
npx tessl i tessl/pypi-tifffile