tessl/pypi-tifffile

Read and write TIFF files for scientific and bioimaging applications with comprehensive format support

Overview

Eval results

Files

Zarr Integration

Name: tessl/pypi-tifffile
Author: tessl

Zarr store implementations for cloud-native access to TIFF files and file sequences, enabling scalable processing of large scientific datasets without loading entire files into memory. These stores provide compatibility with the Zarr ecosystem for distributed computing and analysis workflows.

Note: These classes are available in the tifffile.zarr module, not the main tifffile module.

from tifffile.zarr import ZarrTiffStore, ZarrFileSequenceStore, ZarrStore

Capabilities

ZarrTiffStore Class

Zarr store interface for individual TIFF files, providing chunked access to image data with lazy loading and memory-efficient processing.

class ZarrTiffStore:
    def __init__(
        self,
        tifffile,
        *,
        key=None,
        series=None,
        level=None,
        chunkmode=None,
        fillvalue=None,
        zattrs=None,
        **kwargs
    ):
        """
        Initialize Zarr store for TIFF file.

        Parameters:
        - tifffile: TiffFile instance or file path
        - key: int, slice, or sequence of page indices
        - series: int, series index for multi-series files
        - level: int, pyramid level for multi-resolution files  
        - chunkmode: CHUNKMODE enum, chunking strategy
        - fillvalue: numeric, fill value for missing data
        - zattrs: dict, additional Zarr attributes
        """

    def __getitem__(self, key):
        """Get data chunk by key."""

    def __setitem__(self, key, value):
        """Set data chunk (read-only store, raises NotImplementedError)."""

    def __contains__(self, key):
        """Check if key exists in store."""

    def __iter__(self):
        """Iterate over store keys."""

    def keys(self):
        """Return all keys in store."""

    @property
    def shape(self):
        """tuple: Shape of the array."""

    @property
    def dtype(self):
        """np.dtype: Data type of array elements."""

    @property
    def chunks(self):
        """tuple: Chunk dimensions."""

Usage Examples

# Create Zarr store from TIFF file
with tifffile.TiffFile('large.tif') as tif:
    store = tif.aszarr()
    print(f"Shape: {store.shape}")
    print(f"Chunks: {store.chunks}")
    
    # Access data chunks
    chunk = store[0:1000, 0:1000]
    
# Direct creation from file path
store = tifffile.imread('huge.tif', aszarr=True)

# Use with Zarr array
import zarr
z_array = zarr.open(store, mode='r')
print(f"Zarr array: {z_array}")

# Process in chunks with Dask
import dask.array as da
dask_array = da.from_zarr(store)
result = dask_array.mean(axis=0).compute()

ZarrFileSequenceStore Class

Zarr store for sequences of TIFF files, treating multiple files as a single logical array with an additional time or sequence dimension.

class ZarrFileSequenceStore:
    def __init__(
        self,
        files,
        *,
        imread=None,
        pattern=None,
        axesorder=None,
        categories=None,
        chunkmode=None,
        fillvalue=None,
        zattrs=None,
        **kwargs
    ):
        """
        Initialize Zarr store for file sequence.

        Parameters:
        - files: sequence of file paths or glob pattern
        - imread: callable, custom function for reading files
        - pattern: str, glob pattern for file matching
        - axesorder: sequence of ints, axis reordering
        - categories: dict, categorical data mappings
        - chunkmode: CHUNKMODE enum, chunking strategy
        - fillvalue: numeric, fill value for missing data
        - zattrs: dict, additional Zarr attributes
        """

    def __getitem__(self, key):
        """Get data chunk by key."""

    def __setitem__(self, key, value):
        """Set data chunk (read-only store, raises NotImplementedError)."""

    @property
    def shape(self):
        """tuple: Shape including sequence dimension."""

    @property
    def dtype(self):
        """np.dtype: Data type of array elements."""

    @property
    def chunks(self):
        """tuple: Chunk dimensions."""

    @property
    def files(self):
        """list: File paths in sequence."""

Usage Examples

# Create store from file list
files = ['img001.tif', 'img002.tif', 'img003.tif']
store = tifffile.imread(files, aszarr=True)
print(f"Sequence shape: {store.shape}")  # (3, height, width)

# Create store from glob pattern
store = tifffile.imread('timeseries_*.tif', aszarr=True)

# Access specific timepoints
timepoint_0 = store[0]  # First file
timepoint_slice = store[10:20]  # Files 10-19

# Use with Zarr for time series analysis
import zarr
z_array = zarr.open(store, mode='r')
time_series = z_array[:, 100, 100]  # Pixel time series

ZarrStore Base Class

Base class for Zarr store implementations, providing common functionality.

class ZarrStore:
    def __init__(self, **kwargs):
        """Initialize base Zarr store."""

    def close(self):
        """Close the store and release resources."""

    def __enter__(self):
        """Context manager entry."""

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""

    @property
    def path(self):
        """str: Store path or identifier."""

    @property
    def mode(self):
        """str: Store access mode."""

Advanced Usage Patterns

Large Dataset Processing

# Process extremely large TIFF files
def process_large_tiff(filename, chunk_size=1024):
    with tifffile.TiffFile(filename) as tif:
        store = tif.aszarr()
        
        # Process in chunks to avoid memory issues
        for y in range(0, store.shape[0], chunk_size):
            for x in range(0, store.shape[1], chunk_size):
                y_end = min(y + chunk_size, store.shape[0])
                x_end = min(x + chunk_size, store.shape[1])
                
                chunk = store[y:y_end, x:x_end]
                # Process chunk...
                processed = chunk * 2  # Example processing
                
                yield (y, x), processed

# Usage
for (y, x), result in process_large_tiff('huge.tif'):
    print(f"Processed chunk at ({y}, {x})")

Multi-scale Pyramid Access

# Access different pyramid levels
with tifffile.TiffFile('pyramid.tif') as tif:
    # Full resolution
    full_res = tif.aszarr(level=0)
    
    # Lower resolution levels
    level_1 = tif.aszarr(level=1)
    level_2 = tif.aszarr(level=2)
    
    print(f"Level 0: {full_res.shape}")
    print(f"Level 1: {level_1.shape}")
    print(f"Level 2: {level_2.shape}")

Time Series Analysis

import zarr
import numpy as np

# Analyze time series data
files = [f'timeseries_{i:03d}.tif' for i in range(100)]
store = tifffile.imread(files, aszarr=True)
z_array = zarr.open(store, mode='r')

# Calculate statistics over time
mean_projection = np.mean(z_array, axis=0)
max_projection = np.max(z_array, axis=0)
std_projection = np.std(z_array, axis=0)

# Pixel-wise time series analysis
roi_time_series = z_array[:, 100:200, 100:200]
roi_mean = np.mean(roi_time_series, axis=(1, 2))  # Mean over ROI per timepoint

Distributed Computing Integration

import dask.array as da
from dask.distributed import Client

# Set up Dask client for distributed processing
client = Client('scheduler-address:8786')

# Create Dask array from Zarr store
store = tifffile.imread('large_dataset.tif', aszarr=True)
dask_array = da.from_zarr(store, chunks=(1000, 1000))

# Distributed processing
result = dask_array.map_blocks(
    lambda x: x * 2 + 1,  # Example processing function
    dtype=dask_array.dtype
)

# Compute result
output = result.compute()
client.close()

Cloud Storage Integration

import fsspec
import zarr

# Access TIFF files from cloud storage
def cloud_tiff_store(url, storage_options=None):
    """Create Zarr store for cloud-hosted TIFF file."""
    
    # Open file from cloud storage
    fs = fsspec.filesystem('s3', **storage_options or {})
    
    with fs.open(url, 'rb') as f:
        with tifffile.TiffFile(f) as tif:
            # Create local Zarr store
            store = zarr.MemoryStore()
            zarr_array = zarr.open(store, mode='w', 
                                 shape=tif.pages[0].shape,
                                 dtype=tif.pages[0].dtype,
                                 chunks=(1024, 1024))
            
            # Copy data in chunks
            tiff_store = tif.aszarr()
            zarr_array[:] = tiff_store[:]
            
    return store

# Usage
storage_opts = {'key': 'access_key', 'secret': 'secret_key'}
store = cloud_tiff_store('s3://bucket/large_image.tif', storage_opts)

Custom Chunking Strategies

# Optimize chunking for specific access patterns
def create_optimized_store(filename, access_pattern='sequential'):
    with tifffile.TiffFile(filename) as tif:
        if access_pattern == 'sequential':
            # Optimize for row-wise access
            chunkmode = tifffile.CHUNKMODE.PAGE
        elif access_pattern == 'spatial':
            # Optimize for spatial locality
            chunkmode = tifffile.CHUNKMODE.TILE
        elif access_pattern == 'temporal':
            # Optimize for time series access
            chunkmode = tifffile.CHUNKMODE.FRAME
        else:
            chunkmode = None
            
        return tif.aszarr(chunkmode=chunkmode)

# Usage for different access patterns
sequential_store = create_optimized_store('data.tif', 'sequential')
spatial_store = create_optimized_store('data.tif', 'spatial')

Memory-Mapped Integration

# Combine memory mapping with Zarr for hybrid access
def hybrid_access(filename):
    # Memory-mapped access for small data
    mmap_data = tifffile.memmap(filename)
    
    # Zarr store for chunked access to same data
    zarr_store = tifffile.imread(filename, aszarr=True)
    
    return mmap_data, zarr_store

# Usage
mmap_view, zarr_view = hybrid_access('data.tif')

# Direct memory access
small_region = mmap_view[100:200, 100:200]

# Chunked access for large operations
import dask.array as da
large_computation = da.from_zarr(zarr_view).sum().compute()

Performance Considerations

Chunk Size Optimization

# Determine optimal chunk size based on data characteristics
def optimize_chunks(store, memory_limit_mb=100):
    """Calculate optimal chunk size for given memory limit."""
    
    dtype_size = np.dtype(store.dtype).itemsize
    max_elements = (memory_limit_mb * 1024 * 1024) // dtype_size
    
    # Calculate chunk dimensions
    if len(store.shape) == 2:
        side_length = int(np.sqrt(max_elements))
        chunk_shape = (min(side_length, store.shape[0]),
                      min(side_length, store.shape[1]))
    elif len(store.shape) == 3:
        # For 3D data, keep reasonable z-dimension
        z_chunk = min(10, store.shape[0])
        xy_elements = max_elements // z_chunk
        xy_side = int(np.sqrt(xy_elements))
        chunk_shape = (z_chunk,
                      min(xy_side, store.shape[1]),
                      min(xy_side, store.shape[2]))
    
    return chunk_shape

I/O Optimization

# Optimize I/O for large file sequences
def efficient_sequence_processing(pattern, process_func):
    """Process file sequence with optimized I/O."""
    
    # Create store with optimized settings
    store = tifffile.imread(
        pattern,
        aszarr=True,
        maxworkers=4,  # Parallel file reading
        chunkmode='page'  # Page-based chunking
    )
    
    # Process with Dask for memory efficiency
    import dask.array as da
    dask_array = da.from_zarr(store)
    
    # Apply processing function
    result = dask_array.map_blocks(
        process_func,
        dtype=dask_array.dtype,
        drop_axis=None
    )
    
    return result.compute()

Error Handling

Common Zarr integration error scenarios:

try:
    store = tifffile.imread('large.tif', aszarr=True)
    data = store[1000:2000, 1000:2000]
except MemoryError:
    # Fall back to smaller chunks
    print("Memory error, using smaller chunks")
    store = tifffile.imread('large.tif', aszarr=True, chunkmode='tile')
    data = store[1000:2000, 1000:2000]
except ValueError as e:
    print(f"Invalid chunk access: {e}")
except IOError as e:
    print(f"File access error: {e}")

Dependencies

Zarr integration requires additional packages:

# Install with Zarr support
pip install tifffile[zarr]

# Or install dependencies manually
pip install zarr>=3 fsspec kerchunk

The Zarr integration provides seamless compatibility with the broader Python scientific computing ecosystem, enabling efficient processing of large scientific imaging datasets in cloud-native workflows.

Install with Tessl CLI