tessl/pypi-tifffile

Read and write TIFF files for scientific and bioimaging applications with comprehensive format support

Overview

Eval results

Files

Utility Functions

Name: tessl/pypi-tifffile
Author: tessl

Helper functions for file operations, data conversion, string processing, array manipulation, and scientific image format utilities that support the core TIFF functionality. These utilities provide essential tools for working with scientific imaging data and file formats.

Capabilities

File and Data Utilities

Core utilities for file operations and data handling.

def format_size(size):
    """
    Format byte size as human-readable string.

    Parameters:
    - size: int, size in bytes

    Returns:
    - str: Formatted size string (e.g., "1.5 MB", "2.3 GB")
    """

def hexdump(data, width=16, height=16):
    """
    Return hexadecimal dump of binary data.

    Parameters:
    - data: bytes, binary data to dump
    - width: int, number of bytes per line
    - height: int, maximum number of lines

    Returns:
    - str: Hexadecimal representation with ASCII preview
    """

def askopenfilename(**kwargs):
    """
    Open file dialog for selecting TIFF files.

    Parameters:
    - **kwargs: additional arguments for dialog

    Returns:
    - str: Selected file path or None if cancelled
    """

def create_output(shape, dtype, **kwargs):
    """
    Create output array with specified shape and dtype.

    Parameters:
    - shape: tuple, array dimensions
    - dtype: dtype, data type for array
    - **kwargs: additional arguments for array creation

    Returns:
    - np.ndarray: Pre-allocated output array
    """

Usage Examples

# Format file sizes
size = 1024 * 1024 * 1.5  # 1.5 MB
formatted = tifffile.format_size(size)
print(formatted)  # "1.5 MB"

# Hexdump binary data
with open('image.tif', 'rb') as f:
    header = f.read(64)
    print(tifffile.hexdump(header))

# Interactive file selection
filename = tifffile.askopenfilename(
    title='Select TIFF file',
    filetypes=[('TIFF files', '*.tif *.tiff')]
)

# Pre-allocate output array
output = tifffile.create_output((1000, 1000), np.uint16)

Data Type and Conversion Utilities

Functions for handling data type conversions and array operations.

def astype(data, dtype, **kwargs):
    """
    Convert array to specified data type with optimizations.

    Parameters:
    - data: array-like, input data
    - dtype: dtype, target data type
    - **kwargs: additional conversion arguments

    Returns:
    - np.ndarray: Converted array
    """

def product(iterable):
    """
    Calculate product of all elements in iterable.

    Parameters:
    - iterable: sequence of numbers

    Returns:
    - numeric: Product of all elements
    """

def repeat_nd(array, repeats, axis=None):
    """
    Repeat array elements along specified axis.

    Parameters:
    - array: array-like, input array
    - repeats: int or sequence, number of repetitions
    - axis: int, axis along which to repeat

    Returns:
    - np.ndarray: Array with repeated elements
    """

Usage Examples

# Type conversion with optimization
data = np.random.random((100, 100))
uint16_data = tifffile.astype(data, np.uint16, scale=True)

# Calculate array size
shape = (10, 20, 30)
total_elements = tifffile.product(shape)  # 6000

# Repeat array elements
arr = np.array([1, 2, 3])
repeated = tifffile.repeat_nd(arr, [2, 3, 1])  # [1, 1, 2, 2, 2, 3]

Array Manipulation Utilities

Functions for reshaping and manipulating multi-dimensional arrays.

def reshape_axes(axes, shape, **kwargs):
    """
    Reshape array axes based on axis labels.

    Parameters:
    - axes: str, axis labels (e.g., 'TZCYX')
    - shape: tuple, array dimensions
    - **kwargs: additional reshape arguments

    Returns:
    - tuple: New shape and axis mapping
    """

def reshape_nd(array, shape, **kwargs):
    """
    Reshape N-dimensional array with advanced options.

    Parameters:
    - array: array-like, input array
    - shape: tuple, target shape
    - **kwargs: reshape options

    Returns:
    - np.ndarray: Reshaped array
    """

def transpose_axes(axes, source, target):
    """
    Calculate transpose order for axis transformation.

    Parameters:
    - axes: str, current axis labels
    - source: str, source axis order
    - target: str, target axis order

    Returns:
    - tuple: Transpose indices
    """

Usage Examples

# Reshape with axis labels
axes = 'TZCYX'
shape = (10, 5, 3, 100, 100)
new_shape, mapping = tifffile.reshape_axes(axes, shape)

# Advanced array reshaping
data = np.random.random((10, 100, 100))
reshaped = tifffile.reshape_nd(data, (5, 2, 100, 100))

# Calculate transpose for axis reordering
transpose_order = tifffile.transpose_axes('TZCYX', 'TZCYX', 'CTZYX')
transposed = data.transpose(transpose_order)

String Processing Utilities

Functions for handling strings and text processing.

def natural_sorted(iterable, key=None, **kwargs):
    """
    Sort strings in natural order (handles numbers correctly).

    Parameters:
    - iterable: sequence of strings to sort
    - key: function, key extraction function
    - **kwargs: additional sort arguments

    Returns:
    - list: Naturally sorted strings
    """

def matlabstr2py(matlab_string):
    """
    Convert MATLAB string representation to Python string.

    Parameters:
    - matlab_string: str, MATLAB-formatted string

    Returns:
    - str: Python-compatible string
    """

def strptime(time_string, format_string):
    """
    Parse time string using specified format.

    Parameters:
    - time_string: str, time representation
    - format_string: str, parsing format

    Returns:
    - datetime: Parsed datetime object
    """

def stripnull(string):
    """
    Remove null characters from string (deprecated).

    Parameters:
    - string: str, input string

    Returns:
    - str: String with null characters removed
    """

Usage Examples

# Natural sorting of filenames
files = ['img1.tif', 'img10.tif', 'img2.tif', 'img20.tif']
sorted_files = tifffile.natural_sorted(files)
# Result: ['img1.tif', 'img2.tif', 'img10.tif', 'img20.tif']

# Convert MATLAB strings
matlab_str = "{'channel1', 'channel2', 'channel3'}"
python_list = tifffile.matlabstr2py(matlab_str)

# Parse time strings
time_str = "2023-12-25 14:30:00"
parsed_time = tifffile.strptime(time_str, "%Y-%m-%d %H:%M:%S")

File Sequence Utilities

Functions for working with sequences of files.

def parse_filenames(pattern, **kwargs):
    """
    Parse filename patterns and extract sequence information.

    Parameters:
    - pattern: str, glob pattern or filename template
    - **kwargs: parsing options

    Returns:
    - list: Parsed filename information
    """

def parse_kwargs(kwargs, **defaults):
    """
    Parse keyword arguments with default values.

    Parameters:
    - kwargs: dict, input keyword arguments  
    - **defaults: default values for arguments

    Returns:
    - dict: Processed keyword arguments
    """

def update_kwargs(target, source, **kwargs):
    """
    Update keyword arguments dictionary.

    Parameters:
    - target: dict, target dictionary to update
    - source: dict, source dictionary with new values
    - **kwargs: additional keyword arguments

    Returns:
    - dict: Updated dictionary
    """

Usage Examples

# Parse filename sequences
pattern = 'experiment_t{t:03d}_c{c:02d}.tif'
filenames = tifffile.parse_filenames(pattern, t=range(10), c=range(3))

# Process keyword arguments
defaults = {'compression': 'lzw', 'photometric': 'minisblack'}
kwargs = {'compression': 'deflate'}
processed = tifffile.parse_kwargs(kwargs, **defaults)

# Update argument dictionaries
base_args = {'mode': 'w', 'bigtiff': False}
new_args = {'bigtiff': True, 'compression': 'lzw'}
updated = tifffile.update_kwargs(base_args, new_args)

Formatting and Display Utilities

Functions for data formatting and display.

def pformat(data, **kwargs):
    """
    Pretty-format data structures for display.

    Parameters:
    - data: any, data to format
    - **kwargs: formatting options

    Returns:
    - str: Formatted string representation
    """

def enumarg(enum_class, arg):
    """
    Convert argument to enum member.

    Parameters:
    - enum_class: enum class
    - arg: str, int, or enum member

    Returns:
    - enum member
    """

def enumstr(enum_class, *args, **kwargs):
    """
    Return string representation of enum values.

    Parameters:
    - enum_class: enum class
    - *args, **kwargs: enum values

    Returns:
    - str: Formatted enum string
    """

Usage Examples

# Pretty-format complex data
metadata = {'shape': (100, 100), 'dtype': 'uint8', 'compression': 'lzw'}
formatted = tifffile.pformat(metadata, indent=2)
print(formatted)

# Work with enums
compression = tifffile.enumarg(tifffile.COMPRESSION, 'lzw')
comp_str = tifffile.enumstr(tifffile.COMPRESSION, compression)

XML and Metadata Utilities

Functions for processing XML and metadata formats.

def xml2dict(xml_string, **kwargs):
    """
    Convert XML string to dictionary representation.

    Parameters:
    - xml_string: str, XML content
    - **kwargs: parsing options

    Returns:
    - dict: XML data as nested dictionary
    """

def validate_jhove(filename, **kwargs):
    """
    Validate TIFF file using JHOVE-compatible rules.

    Parameters:
    - filename: str, path to TIFF file
    - **kwargs: validation options

    Returns:
    - dict: Validation results and issues
    """

File Management Classes

Advanced classes for managing file handles, caches, and sequences.

class FileCache:
    def __init__(self, maxsize=128):
        """
        Initialize file handle cache.

        Parameters:
        - maxsize: int, maximum number of cached file handles
        """

    def open(self, filename, mode='rb'):
        """
        Open file with caching.

        Parameters:
        - filename: str, path to file
        - mode: str, file opening mode

        Returns:
        - file handle: Cached file handle
        """

    def close(self, filename=None):
        """
        Close cached file handles.

        Parameters:
        - filename: str, specific file to close (None for all)
        """

class FileSequence:
    def __init__(self, pattern, **kwargs):
        """
        Initialize file sequence handler.

        Parameters:
        - pattern: str, glob pattern for file matching
        - **kwargs: sequence configuration options
        """

    @property
    def files(self):
        """list: Files in sequence."""

    @property
    def shape(self):
        """tuple: Combined shape of sequence."""

class StoredShape:
    def __init__(self, shape, **kwargs):
        """
        Initialize normalized shape representation.

        Parameters:
        - shape: tuple, array dimensions
        - **kwargs: shape normalization options
        """

    def __str__(self):
        """Return string representation of shape."""

    @property
    def ndim(self):
        """int: Number of dimensions."""

class TiledSequence:
    def __init__(self, files, tile_shape, **kwargs):
        """
        Initialize tiled file sequence handler.

        Parameters:
        - files: list, sequence of file paths
        - tile_shape: tuple, tile dimensions
        - **kwargs: tiling configuration options
        """

    def get_tile(self, tile_index):
        """
        Get specific tile from sequence.

        Parameters:
        - tile_index: int or tuple, tile coordinate

        Returns:
        - array: Tile data
        """

class TiffFormat:
    def __init__(self, byteorder='<', bigtiff=False):
        """
        Initialize TIFF format specification.

        Parameters:
        - byteorder: str, byte order ('<', '>')
        - bigtiff: bool, use BigTIFF format
        """

    @property
    def signature(self):
        """bytes: TIFF format signature."""

    @property
    def version(self):
        """int: TIFF version number."""

Usage Examples

# Parse XML metadata
xml_content = """
<metadata>
    <acquisition>
        <channels>3</channels>
        <frames>100</frames>
    </acquisition>
</metadata>
"""
metadata_dict = tifffile.xml2dict(xml_content)

# Validate TIFF file
validation_result = tifffile.validate_jhove('image.tif')
if validation_result.get('valid', False):
    print("TIFF file is valid")
else:
    print("Issues found:", validation_result.get('issues', []))

Context Managers and Timing

Utility classes for resource management and performance monitoring.

class Timer:
    def __enter__(self):
        """Start timing context."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """End timing context."""

    @property
    def elapsed(self):
        """float: Elapsed time in seconds."""

class NullContext:
    def __enter__(self):
        """No-op context manager entry."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """No-op context manager exit."""

def nullfunc(*args, **kwargs):
    """
    No-operation function that accepts any arguments.

    Returns:
    - None
    """

Usage Examples

# Time operations
with tifffile.Timer() as timer:
    data = tifffile.imread('large.tif')
    processed = data * 2
print(f"Processing took {timer.elapsed:.2f} seconds")

# Conditional context management
use_timer = True
context = tifffile.Timer() if use_timer else tifffile.NullContext()

with context as ctx:
    # Do work
    pass

if hasattr(ctx, 'elapsed'):
    print(f"Elapsed: {ctx.elapsed:.2f}s")

Logging Utilities

Functions for logging and debugging.

def logger():
    """
    Get logger instance for tifffile operations.

    Returns:
    - logging.Logger: Configured logger instance
    """

Usage Examples

# Set up logging
log = tifffile.logger()
log.setLevel(logging.DEBUG)

log.info("Starting TIFF processing")
try:
    data = tifffile.imread('image.tif')
    log.debug(f"Loaded image with shape {data.shape}")
except Exception as e:
    log.error(f"Failed to load image: {e}")

Advanced Usage Patterns

Batch File Processing

def process_tiff_batch(input_pattern, output_dir, process_func):
    """Process multiple TIFF files in batch."""
    import glob
    import os
    
    files = tifffile.natural_sorted(glob.glob(input_pattern))
    
    for filename in files:
        basename = os.path.basename(filename)
        output_path = os.path.join(output_dir, f"processed_{basename}")
        
        with tifffile.Timer() as timer:
            data = tifffile.imread(filename)
            processed = process_func(data)
            tifffile.imwrite(output_path, processed)
        
        size_str = tifffile.format_size(os.path.getsize(filename))
        print(f"Processed {basename} ({size_str}) in {timer.elapsed:.2f}s")

Metadata Extraction Pipeline

def extract_comprehensive_metadata(filename):
    """Extract all available metadata from TIFF file."""
    metadata = {}
    
    with tifffile.TiffFile(filename) as tif:
        # Basic file information
        metadata['filename'] = filename
        metadata['file_size'] = tifffile.format_size(os.path.getsize(filename))
        metadata['pages'] = len(tif.pages)
        
        # Page-level metadata
        page = tif.pages[0]
        metadata['shape'] = page.shape
        metadata['dtype'] = str(page.dtype)
        metadata['compression'] = tifffile.enumstr(tifffile.COMPRESSION, page.compression)
        metadata['photometric'] = tifffile.enumstr(tifffile.PHOTOMETRIC, page.photometric)
        
        # Format-specific metadata
        if tif.ome_metadata:
            metadata['ome'] = tif.ome_metadata
        if tif.imagej_metadata:
            metadata['imagej'] = tif.imagej_metadata
        if tif.lsm_metadata:
            metadata['lsm'] = tif.lsm_metadata
            
        # All tags
        metadata['tags'] = {name: tag.value for name, tag in page.tags.items()}
    
    return metadata

Custom Data Validation

def validate_scientific_tiff(filename, requirements):
    """Validate TIFF file against scientific imaging requirements."""
    issues = []
    
    try:
        with tifffile.TiffFile(filename) as tif:
            page = tif.pages[0]
            
            # Check shape requirements
            if 'min_dimensions' in requirements:
                min_dims = requirements['min_dimensions']
                if len(page.shape) < min_dims:
                    issues.append(f"Insufficient dimensions: {len(page.shape)} < {min_dims}")
            
            # Check data type requirements
            if 'allowed_dtypes' in requirements:
                if page.dtype not in requirements['allowed_dtypes']:
                    issues.append(f"Invalid dtype: {page.dtype}")
            
            # Check compression requirements
            if 'required_compression' in requirements:
                required = requirements['required_compression']
                if page.compression != required:
                    issues.append(f"Wrong compression: {page.compression} != {required}")
            
            # Use JHOVE validation
            jhove_result = tifffile.validate_jhove(filename)
            if not jhove_result.get('valid', True):
                issues.extend(jhove_result.get('issues', []))
                
    except Exception as e:
        issues.append(f"File access error: {e}")
    
    return {'valid': len(issues) == 0, 'issues': issues}

Performance Optimization

Memory-Efficient Processing

def memory_efficient_conversion(input_file, output_file, process_func, chunk_size=1024):
    """Convert large TIFF files with limited memory usage."""
    
    with tifffile.TiffFile(input_file) as tif:
        page = tif.pages[0]
        output_shape = page.shape
        output_dtype = process_func(np.array([[0]], dtype=page.dtype)).dtype
        
        # Create output array
        output = tifffile.create_output(output_shape, output_dtype)
        
        # Process in chunks
        for y in range(0, output_shape[0], chunk_size):
            y_end = min(y + chunk_size, output_shape[0])
            
            for x in range(0, output_shape[1], chunk_size):
                x_end = min(x + chunk_size, output_shape[1])
                
                # Read chunk
                chunk = page.asarray()[y:y_end, x:x_end]
                
                # Process chunk
                processed_chunk = process_func(chunk)
                
                # Store result
                output[y:y_end, x:x_end] = processed_chunk
        
        # Write output
        tifffile.imwrite(output_file, output)

These utilities provide essential functionality for scientific image processing workflows, enabling efficient handling of complex TIFF files and metadata in research and production environments.

Install with Tessl CLI