Read and write TIFF files for scientific and bioimaging applications with comprehensive format support
Helper functions for file operations, data conversion, string processing, array manipulation, and scientific image format utilities that support the core TIFF functionality. These utilities provide essential tools for working with scientific imaging data and file formats.
Core utilities for file operations and data handling.
def format_size(size):
"""
Format byte size as human-readable string.
Parameters:
- size: int, size in bytes
Returns:
- str: Formatted size string (e.g., "1.5 MB", "2.3 GB")
"""
def hexdump(data, width=16, height=16):
"""
Return hexadecimal dump of binary data.
Parameters:
- data: bytes, binary data to dump
- width: int, number of bytes per line
- height: int, maximum number of lines
Returns:
- str: Hexadecimal representation with ASCII preview
"""
def askopenfilename(**kwargs):
"""
Open file dialog for selecting TIFF files.
Parameters:
- **kwargs: additional arguments for dialog
Returns:
- str: Selected file path or None if cancelled
"""
def create_output(shape, dtype, **kwargs):
"""
Create output array with specified shape and dtype.
Parameters:
- shape: tuple, array dimensions
- dtype: dtype, data type for array
- **kwargs: additional arguments for array creation
Returns:
- np.ndarray: Pre-allocated output array
"""# Format file sizes
size = 1024 * 1024 * 1.5 # 1.5 MB
formatted = tifffile.format_size(size)
print(formatted) # "1.5 MB"
# Hexdump binary data
with open('image.tif', 'rb') as f:
header = f.read(64)
print(tifffile.hexdump(header))
# Interactive file selection
filename = tifffile.askopenfilename(
title='Select TIFF file',
filetypes=[('TIFF files', '*.tif *.tiff')]
)
# Pre-allocate output array
output = tifffile.create_output((1000, 1000), np.uint16)Functions for handling data type conversions and array operations.
def astype(data, dtype, **kwargs):
"""
Convert array to specified data type with optimizations.
Parameters:
- data: array-like, input data
- dtype: dtype, target data type
- **kwargs: additional conversion arguments
Returns:
- np.ndarray: Converted array
"""
def product(iterable):
"""
Calculate product of all elements in iterable.
Parameters:
- iterable: sequence of numbers
Returns:
- numeric: Product of all elements
"""
def repeat_nd(array, repeats, axis=None):
"""
Repeat array elements along specified axis.
Parameters:
- array: array-like, input array
- repeats: int or sequence, number of repetitions
- axis: int, axis along which to repeat
Returns:
- np.ndarray: Array with repeated elements
"""# Type conversion with optimization
data = np.random.random((100, 100))
uint16_data = tifffile.astype(data, np.uint16, scale=True)
# Calculate array size
shape = (10, 20, 30)
total_elements = tifffile.product(shape) # 6000
# Repeat array elements
arr = np.array([1, 2, 3])
repeated = tifffile.repeat_nd(arr, [2, 3, 1]) # [1, 1, 2, 2, 2, 3]Functions for reshaping and manipulating multi-dimensional arrays.
def reshape_axes(axes, shape, **kwargs):
"""
Reshape array axes based on axis labels.
Parameters:
- axes: str, axis labels (e.g., 'TZCYX')
- shape: tuple, array dimensions
- **kwargs: additional reshape arguments
Returns:
- tuple: New shape and axis mapping
"""
def reshape_nd(array, shape, **kwargs):
"""
Reshape N-dimensional array with advanced options.
Parameters:
- array: array-like, input array
- shape: tuple, target shape
- **kwargs: reshape options
Returns:
- np.ndarray: Reshaped array
"""
def transpose_axes(axes, source, target):
"""
Calculate transpose order for axis transformation.
Parameters:
- axes: str, current axis labels
- source: str, source axis order
- target: str, target axis order
Returns:
- tuple: Transpose indices
"""# Reshape with axis labels
axes = 'TZCYX'
shape = (10, 5, 3, 100, 100)
new_shape, mapping = tifffile.reshape_axes(axes, shape)
# Advanced array reshaping
data = np.random.random((10, 100, 100))
reshaped = tifffile.reshape_nd(data, (5, 2, 100, 100))
# Calculate transpose for axis reordering
transpose_order = tifffile.transpose_axes('TZCYX', 'TZCYX', 'CTZYX')
transposed = data.transpose(transpose_order)Functions for handling strings and text processing.
def natural_sorted(iterable, key=None, **kwargs):
"""
Sort strings in natural order (handles numbers correctly).
Parameters:
- iterable: sequence of strings to sort
- key: function, key extraction function
- **kwargs: additional sort arguments
Returns:
- list: Naturally sorted strings
"""
def matlabstr2py(matlab_string):
"""
Convert MATLAB string representation to Python string.
Parameters:
- matlab_string: str, MATLAB-formatted string
Returns:
- str: Python-compatible string
"""
def strptime(time_string, format_string):
"""
Parse time string using specified format.
Parameters:
- time_string: str, time representation
- format_string: str, parsing format
Returns:
- datetime: Parsed datetime object
"""
def stripnull(string):
"""
Remove null characters from string (deprecated).
Parameters:
- string: str, input string
Returns:
- str: String with null characters removed
"""# Natural sorting of filenames
files = ['img1.tif', 'img10.tif', 'img2.tif', 'img20.tif']
sorted_files = tifffile.natural_sorted(files)
# Result: ['img1.tif', 'img2.tif', 'img10.tif', 'img20.tif']
# Convert MATLAB strings
matlab_str = "{'channel1', 'channel2', 'channel3'}"
python_list = tifffile.matlabstr2py(matlab_str)
# Parse time strings
time_str = "2023-12-25 14:30:00"
parsed_time = tifffile.strptime(time_str, "%Y-%m-%d %H:%M:%S")Functions for working with sequences of files.
def parse_filenames(pattern, **kwargs):
"""
Parse filename patterns and extract sequence information.
Parameters:
- pattern: str, glob pattern or filename template
- **kwargs: parsing options
Returns:
- list: Parsed filename information
"""
def parse_kwargs(kwargs, **defaults):
"""
Parse keyword arguments with default values.
Parameters:
- kwargs: dict, input keyword arguments
- **defaults: default values for arguments
Returns:
- dict: Processed keyword arguments
"""
def update_kwargs(target, source, **kwargs):
"""
Update keyword arguments dictionary.
Parameters:
- target: dict, target dictionary to update
- source: dict, source dictionary with new values
- **kwargs: additional keyword arguments
Returns:
- dict: Updated dictionary
"""# Parse filename sequences
pattern = 'experiment_t{t:03d}_c{c:02d}.tif'
filenames = tifffile.parse_filenames(pattern, t=range(10), c=range(3))
# Process keyword arguments
defaults = {'compression': 'lzw', 'photometric': 'minisblack'}
kwargs = {'compression': 'deflate'}
processed = tifffile.parse_kwargs(kwargs, **defaults)
# Update argument dictionaries
base_args = {'mode': 'w', 'bigtiff': False}
new_args = {'bigtiff': True, 'compression': 'lzw'}
updated = tifffile.update_kwargs(base_args, new_args)Functions for data formatting and display.
def pformat(data, **kwargs):
"""
Pretty-format data structures for display.
Parameters:
- data: any, data to format
- **kwargs: formatting options
Returns:
- str: Formatted string representation
"""
def enumarg(enum_class, arg):
"""
Convert argument to enum member.
Parameters:
- enum_class: enum class
- arg: str, int, or enum member
Returns:
- enum member
"""
def enumstr(enum_class, *args, **kwargs):
"""
Return string representation of enum values.
Parameters:
- enum_class: enum class
- *args, **kwargs: enum values
Returns:
- str: Formatted enum string
"""# Pretty-format complex data
metadata = {'shape': (100, 100), 'dtype': 'uint8', 'compression': 'lzw'}
formatted = tifffile.pformat(metadata, indent=2)
print(formatted)
# Work with enums
compression = tifffile.enumarg(tifffile.COMPRESSION, 'lzw')
comp_str = tifffile.enumstr(tifffile.COMPRESSION, compression)Functions for processing XML and metadata formats.
def xml2dict(xml_string, **kwargs):
"""
Convert XML string to dictionary representation.
Parameters:
- xml_string: str, XML content
- **kwargs: parsing options
Returns:
- dict: XML data as nested dictionary
"""
def validate_jhove(filename, **kwargs):
"""
Validate TIFF file using JHOVE-compatible rules.
Parameters:
- filename: str, path to TIFF file
- **kwargs: validation options
Returns:
- dict: Validation results and issues
"""Advanced classes for managing file handles, caches, and sequences.
class FileCache:
def __init__(self, maxsize=128):
"""
Initialize file handle cache.
Parameters:
- maxsize: int, maximum number of cached file handles
"""
def open(self, filename, mode='rb'):
"""
Open file with caching.
Parameters:
- filename: str, path to file
- mode: str, file opening mode
Returns:
- file handle: Cached file handle
"""
def close(self, filename=None):
"""
Close cached file handles.
Parameters:
- filename: str, specific file to close (None for all)
"""
class FileSequence:
def __init__(self, pattern, **kwargs):
"""
Initialize file sequence handler.
Parameters:
- pattern: str, glob pattern for file matching
- **kwargs: sequence configuration options
"""
@property
def files(self):
"""list: Files in sequence."""
@property
def shape(self):
"""tuple: Combined shape of sequence."""
class StoredShape:
def __init__(self, shape, **kwargs):
"""
Initialize normalized shape representation.
Parameters:
- shape: tuple, array dimensions
- **kwargs: shape normalization options
"""
def __str__(self):
"""Return string representation of shape."""
@property
def ndim(self):
"""int: Number of dimensions."""
class TiledSequence:
def __init__(self, files, tile_shape, **kwargs):
"""
Initialize tiled file sequence handler.
Parameters:
- files: list, sequence of file paths
- tile_shape: tuple, tile dimensions
- **kwargs: tiling configuration options
"""
def get_tile(self, tile_index):
"""
Get specific tile from sequence.
Parameters:
- tile_index: int or tuple, tile coordinate
Returns:
- array: Tile data
"""
class TiffFormat:
def __init__(self, byteorder='<', bigtiff=False):
"""
Initialize TIFF format specification.
Parameters:
- byteorder: str, byte order ('<', '>')
- bigtiff: bool, use BigTIFF format
"""
@property
def signature(self):
"""bytes: TIFF format signature."""
@property
def version(self):
"""int: TIFF version number."""# Parse XML metadata
xml_content = """
<metadata>
<acquisition>
<channels>3</channels>
<frames>100</frames>
</acquisition>
</metadata>
"""
metadata_dict = tifffile.xml2dict(xml_content)
# Validate TIFF file
validation_result = tifffile.validate_jhove('image.tif')
if validation_result.get('valid', False):
print("TIFF file is valid")
else:
print("Issues found:", validation_result.get('issues', []))Utility classes for resource management and performance monitoring.
class Timer:
def __enter__(self):
"""Start timing context."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""End timing context."""
@property
def elapsed(self):
"""float: Elapsed time in seconds."""
class NullContext:
def __enter__(self):
"""No-op context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""No-op context manager exit."""
def nullfunc(*args, **kwargs):
"""
No-operation function that accepts any arguments.
Returns:
- None
"""# Time operations
with tifffile.Timer() as timer:
data = tifffile.imread('large.tif')
processed = data * 2
print(f"Processing took {timer.elapsed:.2f} seconds")
# Conditional context management
use_timer = True
context = tifffile.Timer() if use_timer else tifffile.NullContext()
with context as ctx:
# Do work
pass
if hasattr(ctx, 'elapsed'):
print(f"Elapsed: {ctx.elapsed:.2f}s")Functions for logging and debugging.
def logger():
"""
Get logger instance for tifffile operations.
Returns:
- logging.Logger: Configured logger instance
"""# Set up logging
log = tifffile.logger()
log.setLevel(logging.DEBUG)
log.info("Starting TIFF processing")
try:
data = tifffile.imread('image.tif')
log.debug(f"Loaded image with shape {data.shape}")
except Exception as e:
log.error(f"Failed to load image: {e}")def process_tiff_batch(input_pattern, output_dir, process_func):
"""Process multiple TIFF files in batch."""
import glob
import os
files = tifffile.natural_sorted(glob.glob(input_pattern))
for filename in files:
basename = os.path.basename(filename)
output_path = os.path.join(output_dir, f"processed_{basename}")
with tifffile.Timer() as timer:
data = tifffile.imread(filename)
processed = process_func(data)
tifffile.imwrite(output_path, processed)
size_str = tifffile.format_size(os.path.getsize(filename))
print(f"Processed {basename} ({size_str}) in {timer.elapsed:.2f}s")def extract_comprehensive_metadata(filename):
"""Extract all available metadata from TIFF file."""
metadata = {}
with tifffile.TiffFile(filename) as tif:
# Basic file information
metadata['filename'] = filename
metadata['file_size'] = tifffile.format_size(os.path.getsize(filename))
metadata['pages'] = len(tif.pages)
# Page-level metadata
page = tif.pages[0]
metadata['shape'] = page.shape
metadata['dtype'] = str(page.dtype)
metadata['compression'] = tifffile.enumstr(tifffile.COMPRESSION, page.compression)
metadata['photometric'] = tifffile.enumstr(tifffile.PHOTOMETRIC, page.photometric)
# Format-specific metadata
if tif.ome_metadata:
metadata['ome'] = tif.ome_metadata
if tif.imagej_metadata:
metadata['imagej'] = tif.imagej_metadata
if tif.lsm_metadata:
metadata['lsm'] = tif.lsm_metadata
# All tags
metadata['tags'] = {name: tag.value for name, tag in page.tags.items()}
return metadatadef validate_scientific_tiff(filename, requirements):
"""Validate TIFF file against scientific imaging requirements."""
issues = []
try:
with tifffile.TiffFile(filename) as tif:
page = tif.pages[0]
# Check shape requirements
if 'min_dimensions' in requirements:
min_dims = requirements['min_dimensions']
if len(page.shape) < min_dims:
issues.append(f"Insufficient dimensions: {len(page.shape)} < {min_dims}")
# Check data type requirements
if 'allowed_dtypes' in requirements:
if page.dtype not in requirements['allowed_dtypes']:
issues.append(f"Invalid dtype: {page.dtype}")
# Check compression requirements
if 'required_compression' in requirements:
required = requirements['required_compression']
if page.compression != required:
issues.append(f"Wrong compression: {page.compression} != {required}")
# Use JHOVE validation
jhove_result = tifffile.validate_jhove(filename)
if not jhove_result.get('valid', True):
issues.extend(jhove_result.get('issues', []))
except Exception as e:
issues.append(f"File access error: {e}")
return {'valid': len(issues) == 0, 'issues': issues}def memory_efficient_conversion(input_file, output_file, process_func, chunk_size=1024):
"""Convert large TIFF files with limited memory usage."""
with tifffile.TiffFile(input_file) as tif:
page = tif.pages[0]
output_shape = page.shape
output_dtype = process_func(np.array([[0]], dtype=page.dtype)).dtype
# Create output array
output = tifffile.create_output(output_shape, output_dtype)
# Process in chunks
for y in range(0, output_shape[0], chunk_size):
y_end = min(y + chunk_size, output_shape[0])
for x in range(0, output_shape[1], chunk_size):
x_end = min(x + chunk_size, output_shape[1])
# Read chunk
chunk = page.asarray()[y:y_end, x:x_end]
# Process chunk
processed_chunk = process_func(chunk)
# Store result
output[y:y_end, x:x_end] = processed_chunk
# Write output
tifffile.imwrite(output_file, output)These utilities provide essential functionality for scientific image processing workflows, enabling efficient handling of complex TIFF files and metadata in research and production environments.
Install with Tessl CLI
npx tessl i tessl/pypi-tifffile