0
# Utility Functions
1
2
Helper functions for file operations, data conversion, string processing, array manipulation, and scientific image format utilities that support the core TIFF functionality. These utilities provide essential tools for working with scientific imaging data and file formats.
3
4
## Capabilities
5
6
### File and Data Utilities
7
8
Core utilities for file operations and data handling.
9
10
```python { .api }
11
def format_size(size):
12
"""
13
Format byte size as human-readable string.
14
15
Parameters:
16
- size: int, size in bytes
17
18
Returns:
19
- str: Formatted size string (e.g., "1.5 MB", "2.3 GB")
20
"""
21
22
def hexdump(data, width=16, height=16):
23
"""
24
Return hexadecimal dump of binary data.
25
26
Parameters:
27
- data: bytes, binary data to dump
28
- width: int, number of bytes per line
29
- height: int, maximum number of lines
30
31
Returns:
32
- str: Hexadecimal representation with ASCII preview
33
"""
34
35
def askopenfilename(**kwargs):
36
"""
37
Open file dialog for selecting TIFF files.
38
39
Parameters:
40
- **kwargs: additional arguments for dialog
41
42
Returns:
43
- str: Selected file path or None if cancelled
44
"""
45
46
def create_output(shape, dtype, **kwargs):
47
"""
48
Create output array with specified shape and dtype.
49
50
Parameters:
51
- shape: tuple, array dimensions
52
- dtype: dtype, data type for array
53
- **kwargs: additional arguments for array creation
54
55
Returns:
56
- np.ndarray: Pre-allocated output array
57
"""
58
```
59
60
#### Usage Examples
61
62
```python
63
# Format file sizes
64
size = 1024 * 1024 * 1.5 # 1.5 MB
65
formatted = tifffile.format_size(size)
66
print(formatted) # "1.5 MB"
67
68
# Hexdump binary data
69
with open('image.tif', 'rb') as f:
70
header = f.read(64)
71
print(tifffile.hexdump(header))
72
73
# Interactive file selection
74
filename = tifffile.askopenfilename(
75
title='Select TIFF file',
76
filetypes=[('TIFF files', '*.tif *.tiff')]
77
)
78
79
# Pre-allocate output array
80
output = tifffile.create_output((1000, 1000), np.uint16)
81
```
82
83
### Data Type and Conversion Utilities
84
85
Functions for handling data type conversions and array operations.
86
87
```python { .api }
88
def astype(data, dtype, **kwargs):
89
"""
90
Convert array to specified data type with optimizations.
91
92
Parameters:
93
- data: array-like, input data
94
- dtype: dtype, target data type
95
- **kwargs: additional conversion arguments
96
97
Returns:
98
- np.ndarray: Converted array
99
"""
100
101
def product(iterable):
102
"""
103
Calculate product of all elements in iterable.
104
105
Parameters:
106
- iterable: sequence of numbers
107
108
Returns:
109
- numeric: Product of all elements
110
"""
111
112
def repeat_nd(array, repeats, axis=None):
113
"""
114
Repeat array elements along specified axis.
115
116
Parameters:
117
- array: array-like, input array
118
- repeats: int or sequence, number of repetitions
119
- axis: int, axis along which to repeat
120
121
Returns:
122
- np.ndarray: Array with repeated elements
123
"""
124
```
125
126
#### Usage Examples
127
128
```python
129
# Type conversion with optimization
130
data = np.random.random((100, 100))
131
uint16_data = tifffile.astype(data, np.uint16, scale=True)
132
133
# Calculate array size
134
shape = (10, 20, 30)
135
total_elements = tifffile.product(shape) # 6000
136
137
# Repeat array elements
138
arr = np.array([1, 2, 3])
139
repeated = tifffile.repeat_nd(arr, [2, 3, 1]) # [1, 1, 2, 2, 2, 3]
140
```
141
142
### Array Manipulation Utilities
143
144
Functions for reshaping and manipulating multi-dimensional arrays.
145
146
```python { .api }
147
def reshape_axes(axes, shape, **kwargs):
148
"""
149
Reshape array axes based on axis labels.
150
151
Parameters:
152
- axes: str, axis labels (e.g., 'TZCYX')
153
- shape: tuple, array dimensions
154
- **kwargs: additional reshape arguments
155
156
Returns:
157
- tuple: New shape and axis mapping
158
"""
159
160
def reshape_nd(array, shape, **kwargs):
161
"""
162
Reshape N-dimensional array with advanced options.
163
164
Parameters:
165
- array: array-like, input array
166
- shape: tuple, target shape
167
- **kwargs: reshape options
168
169
Returns:
170
- np.ndarray: Reshaped array
171
"""
172
173
def transpose_axes(axes, source, target):
174
"""
175
Calculate transpose order for axis transformation.
176
177
Parameters:
178
- axes: str, current axis labels
179
- source: str, source axis order
180
- target: str, target axis order
181
182
Returns:
183
- tuple: Transpose indices
184
"""
185
```
186
187
#### Usage Examples
188
189
```python
190
# Reshape with axis labels
191
axes = 'TZCYX'
192
shape = (10, 5, 3, 100, 100)
193
new_shape, mapping = tifffile.reshape_axes(axes, shape)
194
195
# Advanced array reshaping
196
data = np.random.random((10, 100, 100))
197
reshaped = tifffile.reshape_nd(data, (5, 2, 100, 100))
198
199
# Calculate transpose for axis reordering
200
transpose_order = tifffile.transpose_axes('TZCYX', 'TZCYX', 'CTZYX')
201
transposed = data.transpose(transpose_order)
202
```
203
204
### String Processing Utilities
205
206
Functions for handling strings and text processing.
207
208
```python { .api }
209
def natural_sorted(iterable, key=None, **kwargs):
210
"""
211
Sort strings in natural order (handles numbers correctly).
212
213
Parameters:
214
- iterable: sequence of strings to sort
215
- key: function, key extraction function
216
- **kwargs: additional sort arguments
217
218
Returns:
219
- list: Naturally sorted strings
220
"""
221
222
def matlabstr2py(matlab_string):
223
"""
224
Convert MATLAB string representation to Python string.
225
226
Parameters:
227
- matlab_string: str, MATLAB-formatted string
228
229
Returns:
230
- str: Python-compatible string
231
"""
232
233
def strptime(time_string, format_string):
234
"""
235
Parse time string using specified format.
236
237
Parameters:
238
- time_string: str, time representation
239
- format_string: str, parsing format
240
241
Returns:
242
- datetime: Parsed datetime object
243
"""
244
245
def stripnull(string):
246
"""
247
Remove null characters from string (deprecated).
248
249
Parameters:
250
- string: str, input string
251
252
Returns:
253
- str: String with null characters removed
254
"""
255
```
256
257
#### Usage Examples
258
259
```python
260
# Natural sorting of filenames
261
files = ['img1.tif', 'img10.tif', 'img2.tif', 'img20.tif']
262
sorted_files = tifffile.natural_sorted(files)
263
# Result: ['img1.tif', 'img2.tif', 'img10.tif', 'img20.tif']
264
265
# Convert MATLAB strings
266
matlab_str = "{'channel1', 'channel2', 'channel3'}"
267
python_list = tifffile.matlabstr2py(matlab_str)
268
269
# Parse time strings
270
time_str = "2023-12-25 14:30:00"
271
parsed_time = tifffile.strptime(time_str, "%Y-%m-%d %H:%M:%S")
272
```
273
274
### File Sequence Utilities
275
276
Functions for working with sequences of files.
277
278
```python { .api }
279
def parse_filenames(pattern, **kwargs):
280
"""
281
Parse filename patterns and extract sequence information.
282
283
Parameters:
284
- pattern: str, glob pattern or filename template
285
- **kwargs: parsing options
286
287
Returns:
288
- list: Parsed filename information
289
"""
290
291
def parse_kwargs(kwargs, **defaults):
292
"""
293
Parse keyword arguments with default values.
294
295
Parameters:
296
- kwargs: dict, input keyword arguments
297
- **defaults: default values for arguments
298
299
Returns:
300
- dict: Processed keyword arguments
301
"""
302
303
def update_kwargs(target, source, **kwargs):
304
"""
305
Update keyword arguments dictionary.
306
307
Parameters:
308
- target: dict, target dictionary to update
309
- source: dict, source dictionary with new values
310
- **kwargs: additional keyword arguments
311
312
Returns:
313
- dict: Updated dictionary
314
"""
315
```
316
317
#### Usage Examples
318
319
```python
320
# Parse filename sequences
321
pattern = 'experiment_t{t:03d}_c{c:02d}.tif'
322
filenames = tifffile.parse_filenames(pattern, t=range(10), c=range(3))
323
324
# Process keyword arguments
325
defaults = {'compression': 'lzw', 'photometric': 'minisblack'}
326
kwargs = {'compression': 'deflate'}
327
processed = tifffile.parse_kwargs(kwargs, **defaults)
328
329
# Update argument dictionaries
330
base_args = {'mode': 'w', 'bigtiff': False}
331
new_args = {'bigtiff': True, 'compression': 'lzw'}
332
updated = tifffile.update_kwargs(base_args, new_args)
333
```
334
335
### Formatting and Display Utilities
336
337
Functions for data formatting and display.
338
339
```python { .api }
340
def pformat(data, **kwargs):
341
"""
342
Pretty-format data structures for display.
343
344
Parameters:
345
- data: any, data to format
346
- **kwargs: formatting options
347
348
Returns:
349
- str: Formatted string representation
350
"""
351
352
def enumarg(enum_class, arg):
353
"""
354
Convert argument to enum member.
355
356
Parameters:
357
- enum_class: enum class
358
- arg: str, int, or enum member
359
360
Returns:
361
- enum member
362
"""
363
364
def enumstr(enum_class, *args, **kwargs):
365
"""
366
Return string representation of enum values.
367
368
Parameters:
369
- enum_class: enum class
370
- *args, **kwargs: enum values
371
372
Returns:
373
- str: Formatted enum string
374
"""
375
```
376
377
#### Usage Examples
378
379
```python
380
# Pretty-format complex data
381
metadata = {'shape': (100, 100), 'dtype': 'uint8', 'compression': 'lzw'}
382
formatted = tifffile.pformat(metadata, indent=2)
383
print(formatted)
384
385
# Work with enums
386
compression = tifffile.enumarg(tifffile.COMPRESSION, 'lzw')
387
comp_str = tifffile.enumstr(tifffile.COMPRESSION, compression)
388
```
389
390
### XML and Metadata Utilities
391
392
Functions for processing XML and metadata formats.
393
394
```python { .api }
395
def xml2dict(xml_string, **kwargs):
396
"""
397
Convert XML string to dictionary representation.
398
399
Parameters:
400
- xml_string: str, XML content
401
- **kwargs: parsing options
402
403
Returns:
404
- dict: XML data as nested dictionary
405
"""
406
407
def validate_jhove(filename, **kwargs):
408
"""
409
Validate TIFF file using JHOVE-compatible rules.
410
411
Parameters:
412
- filename: str, path to TIFF file
413
- **kwargs: validation options
414
415
Returns:
416
- dict: Validation results and issues
417
"""
418
```
419
420
### File Management Classes
421
422
Advanced classes for managing file handles, caches, and sequences.
423
424
```python { .api }
425
class FileCache:
426
def __init__(self, maxsize=128):
427
"""
428
Initialize file handle cache.
429
430
Parameters:
431
- maxsize: int, maximum number of cached file handles
432
"""
433
434
def open(self, filename, mode='rb'):
435
"""
436
Open file with caching.
437
438
Parameters:
439
- filename: str, path to file
440
- mode: str, file opening mode
441
442
Returns:
443
- file handle: Cached file handle
444
"""
445
446
def close(self, filename=None):
447
"""
448
Close cached file handles.
449
450
Parameters:
451
- filename: str, specific file to close (None for all)
452
"""
453
454
class FileSequence:
455
def __init__(self, pattern, **kwargs):
456
"""
457
Initialize file sequence handler.
458
459
Parameters:
460
- pattern: str, glob pattern for file matching
461
- **kwargs: sequence configuration options
462
"""
463
464
@property
465
def files(self):
466
"""list: Files in sequence."""
467
468
@property
469
def shape(self):
470
"""tuple: Combined shape of sequence."""
471
472
class StoredShape:
473
def __init__(self, shape, **kwargs):
474
"""
475
Initialize normalized shape representation.
476
477
Parameters:
478
- shape: tuple, array dimensions
479
- **kwargs: shape normalization options
480
"""
481
482
def __str__(self):
483
"""Return string representation of shape."""
484
485
@property
486
def ndim(self):
487
"""int: Number of dimensions."""
488
489
class TiledSequence:
490
def __init__(self, files, tile_shape, **kwargs):
491
"""
492
Initialize tiled file sequence handler.
493
494
Parameters:
495
- files: list, sequence of file paths
496
- tile_shape: tuple, tile dimensions
497
- **kwargs: tiling configuration options
498
"""
499
500
def get_tile(self, tile_index):
501
"""
502
Get specific tile from sequence.
503
504
Parameters:
505
- tile_index: int or tuple, tile coordinate
506
507
Returns:
508
- array: Tile data
509
"""
510
511
class TiffFormat:
512
def __init__(self, byteorder='<', bigtiff=False):
513
"""
514
Initialize TIFF format specification.
515
516
Parameters:
517
- byteorder: str, byte order ('<', '>')
518
- bigtiff: bool, use BigTIFF format
519
"""
520
521
@property
522
def signature(self):
523
"""bytes: TIFF format signature."""
524
525
@property
526
def version(self):
527
"""int: TIFF version number."""
528
```
529
530
#### Usage Examples
531
532
```python
533
# Parse XML metadata
534
xml_content = """
535
<metadata>
536
<acquisition>
537
<channels>3</channels>
538
<frames>100</frames>
539
</acquisition>
540
</metadata>
541
"""
542
metadata_dict = tifffile.xml2dict(xml_content)
543
544
# Validate TIFF file
545
validation_result = tifffile.validate_jhove('image.tif')
546
if validation_result.get('valid', False):
547
print("TIFF file is valid")
548
else:
549
print("Issues found:", validation_result.get('issues', []))
550
```
551
552
### Context Managers and Timing
553
554
Utility classes for resource management and performance monitoring.
555
556
```python { .api }
557
class Timer:
558
def __enter__(self):
559
"""Start timing context."""
560
return self
561
562
def __exit__(self, exc_type, exc_val, exc_tb):
563
"""End timing context."""
564
565
@property
566
def elapsed(self):
567
"""float: Elapsed time in seconds."""
568
569
class NullContext:
570
def __enter__(self):
571
"""No-op context manager entry."""
572
return self
573
574
def __exit__(self, exc_type, exc_val, exc_tb):
575
"""No-op context manager exit."""
576
577
def nullfunc(*args, **kwargs):
578
"""
579
No-operation function that accepts any arguments.
580
581
Returns:
582
- None
583
"""
584
```
585
586
#### Usage Examples
587
588
```python
589
# Time operations
590
with tifffile.Timer() as timer:
591
data = tifffile.imread('large.tif')
592
processed = data * 2
593
print(f"Processing took {timer.elapsed:.2f} seconds")
594
595
# Conditional context management
596
use_timer = True
597
context = tifffile.Timer() if use_timer else tifffile.NullContext()
598
599
with context as ctx:
600
# Do work
601
pass
602
603
if hasattr(ctx, 'elapsed'):
604
print(f"Elapsed: {ctx.elapsed:.2f}s")
605
```
606
607
### Logging Utilities
608
609
Functions for logging and debugging.
610
611
```python { .api }
612
def logger():
613
"""
614
Get logger instance for tifffile operations.
615
616
Returns:
617
- logging.Logger: Configured logger instance
618
"""
619
```
620
621
#### Usage Examples
622
623
```python
624
# Set up logging
625
log = tifffile.logger()
626
log.setLevel(logging.DEBUG)
627
628
log.info("Starting TIFF processing")
629
try:
630
data = tifffile.imread('image.tif')
631
log.debug(f"Loaded image with shape {data.shape}")
632
except Exception as e:
633
log.error(f"Failed to load image: {e}")
634
```
635
636
## Advanced Usage Patterns
637
638
### Batch File Processing
639
640
```python
641
def process_tiff_batch(input_pattern, output_dir, process_func):
642
"""Process multiple TIFF files in batch."""
643
import glob
644
import os
645
646
files = tifffile.natural_sorted(glob.glob(input_pattern))
647
648
for filename in files:
649
basename = os.path.basename(filename)
650
output_path = os.path.join(output_dir, f"processed_{basename}")
651
652
with tifffile.Timer() as timer:
653
data = tifffile.imread(filename)
654
processed = process_func(data)
655
tifffile.imwrite(output_path, processed)
656
657
size_str = tifffile.format_size(os.path.getsize(filename))
658
print(f"Processed {basename} ({size_str}) in {timer.elapsed:.2f}s")
659
```
660
661
### Metadata Extraction Pipeline
662
663
```python
664
def extract_comprehensive_metadata(filename):
665
"""Extract all available metadata from TIFF file."""
666
metadata = {}
667
668
with tifffile.TiffFile(filename) as tif:
669
# Basic file information
670
metadata['filename'] = filename
671
metadata['file_size'] = tifffile.format_size(os.path.getsize(filename))
672
metadata['pages'] = len(tif.pages)
673
674
# Page-level metadata
675
page = tif.pages[0]
676
metadata['shape'] = page.shape
677
metadata['dtype'] = str(page.dtype)
678
metadata['compression'] = tifffile.enumstr(tifffile.COMPRESSION, page.compression)
679
metadata['photometric'] = tifffile.enumstr(tifffile.PHOTOMETRIC, page.photometric)
680
681
# Format-specific metadata
682
if tif.ome_metadata:
683
metadata['ome'] = tif.ome_metadata
684
if tif.imagej_metadata:
685
metadata['imagej'] = tif.imagej_metadata
686
if tif.lsm_metadata:
687
metadata['lsm'] = tif.lsm_metadata
688
689
# All tags
690
metadata['tags'] = {name: tag.value for name, tag in page.tags.items()}
691
692
return metadata
693
```
694
695
### Custom Data Validation
696
697
```python
698
def validate_scientific_tiff(filename, requirements):
699
"""Validate TIFF file against scientific imaging requirements."""
700
issues = []
701
702
try:
703
with tifffile.TiffFile(filename) as tif:
704
page = tif.pages[0]
705
706
# Check shape requirements
707
if 'min_dimensions' in requirements:
708
min_dims = requirements['min_dimensions']
709
if len(page.shape) < min_dims:
710
issues.append(f"Insufficient dimensions: {len(page.shape)} < {min_dims}")
711
712
# Check data type requirements
713
if 'allowed_dtypes' in requirements:
714
if page.dtype not in requirements['allowed_dtypes']:
715
issues.append(f"Invalid dtype: {page.dtype}")
716
717
# Check compression requirements
718
if 'required_compression' in requirements:
719
required = requirements['required_compression']
720
if page.compression != required:
721
issues.append(f"Wrong compression: {page.compression} != {required}")
722
723
# Use JHOVE validation
724
jhove_result = tifffile.validate_jhove(filename)
725
if not jhove_result.get('valid', True):
726
issues.extend(jhove_result.get('issues', []))
727
728
except Exception as e:
729
issues.append(f"File access error: {e}")
730
731
return {'valid': len(issues) == 0, 'issues': issues}
732
```
733
734
## Performance Optimization
735
736
### Memory-Efficient Processing
737
738
```python
739
def memory_efficient_conversion(input_file, output_file, process_func, chunk_size=1024):
740
"""Convert large TIFF files with limited memory usage."""
741
742
with tifffile.TiffFile(input_file) as tif:
743
page = tif.pages[0]
744
output_shape = page.shape
745
output_dtype = process_func(np.array([[0]], dtype=page.dtype)).dtype
746
747
# Create output array
748
output = tifffile.create_output(output_shape, output_dtype)
749
750
# Process in chunks
751
for y in range(0, output_shape[0], chunk_size):
752
y_end = min(y + chunk_size, output_shape[0])
753
754
for x in range(0, output_shape[1], chunk_size):
755
x_end = min(x + chunk_size, output_shape[1])
756
757
# Read chunk
758
chunk = page.asarray()[y:y_end, x:x_end]
759
760
# Process chunk
761
processed_chunk = process_func(chunk)
762
763
# Store result
764
output[y:y_end, x:x_end] = processed_chunk
765
766
# Write output
767
tifffile.imwrite(output_file, output)
768
```
769
770
These utilities provide essential functionality for scientific image processing workflows, enabling efficient handling of complex TIFF files and metadata in research and production environments.