0
# Component Filtering
1
2
Advanced filtering functions for manipulating connected components by size and properties, including dust removal for cleaning up small artifacts and extraction of the largest components for focus on primary structures.
3
4
## Capabilities
5
6
### Dust Removal
7
8
Remove small connected components ("dust") from images with flexible threshold options and efficient processing modes. Supports both size-based filtering and range-based filtering with inversion options.
9
10
```python { .api }
11
def dust(
12
img: NDArray[typing.Any],
13
threshold: Union[int,float,tuple[int,int],tuple[float,float],list[int],list[float]],
14
connectivity: Literal[4,6,8,18,26] = 26,
15
in_place: bool = False,
16
binary_image: bool = False,
17
precomputed_ccl: bool = False,
18
invert: bool = False,
19
return_N: bool = False,
20
) -> Union[NDArray[typing.Any], tuple[NDArray[typing.Any], int]]:
21
"""
22
Remove connected components smaller than threshold ("dust") from the image.
23
24
Parameters:
25
- img: A 2D or 3D image
26
- threshold:
27
int/float: discard components smaller than this in voxels
28
tuple/list: discard components outside range [lower, upper)
29
- connectivity: Connectivity pattern to use for CCL
30
- in_place: Whether to modify the input image or create a copy
31
- binary_image: Treat input as binary image
32
- precomputed_ccl: Input is already a CCL result, skip CCL computation
33
- invert: Switch threshold direction (< becomes >=, between becomes outside)
34
- return_N: Also return the number of remaining components
35
36
Returns:
37
- NDArray: Cleaned image with dust removed
38
- tuple[NDArray, int]: If return_N=True, includes remaining component count
39
"""
40
```
41
42
Usage examples:
43
44
```python
45
import cc3d
46
import numpy as np
47
48
# Create noisy image with small artifacts
49
labels_in = np.random.randint(0, 100, (100, 100, 100))
50
labels_in = cc3d.connected_components(labels_in) # Get connected components first
51
52
# Basic dust removal - remove components smaller than 50 voxels
53
cleaned = cc3d.dust(labels_in, threshold=50)
54
55
# Remove dust and get count of remaining components
56
cleaned, remaining_count = cc3d.dust(
57
labels_in, threshold=50, return_N=True
58
)
59
print(f"Kept {remaining_count} components after dust removal")
60
61
# Range-based filtering - keep only components between 100-1000 voxels
62
medium_components = cc3d.dust(
63
labels_in, threshold=[100, 1000], invert=True
64
)
65
66
# Remove components outside a size range (keep very small and very large)
67
filtered = cc3d.dust(
68
labels_in, threshold=[50, 500] # Remove components 50-499 voxels
69
)
70
71
# In-place modification (memory efficient)
72
original_image = labels_in.copy()
73
cc3d.dust(original_image, threshold=100, in_place=True)
74
75
# Work with binary images
76
binary_input = (labels_in > 0).astype(np.uint8)
77
cleaned_binary = cc3d.dust(
78
binary_input, threshold=200, binary_image=True
79
)
80
81
# Skip CCL computation if input is already connected components
82
cc_labels = cc3d.connected_components(raw_input)
83
cleaned = cc3d.dust(
84
cc_labels, threshold=75, precomputed_ccl=True
85
)
86
87
# Remove large components instead of small ones
88
no_large_components = cc3d.dust(
89
labels_in, threshold=1000, invert=True
90
)
91
```
92
93
### Largest Component Extraction
94
95
Extract the k largest connected components from an image with efficient processing and optional relabeling, useful for focusing analysis on primary structures.
96
97
```python { .api }
98
def largest_k(
99
img: NDArray[typing.Any],
100
k: int,
101
connectivity: Literal[4,6,8,18,26] = 26,
102
delta: Union[int,float] = 0,
103
return_N: bool = False,
104
binary_image: bool = False,
105
precomputed_ccl: bool = False,
106
) -> Union[NDArray[Union[np.bool_,np.uint16,np.uint32,np.uint64]], tuple[NDArray[Union[np.bool_,np.uint16,np.uint32,np.uint64]], int]]:
107
"""
108
Returns the k largest connected components in the image.
109
110
Parameters:
111
- img: Input image
112
- k: Number of largest components to keep (>= 0)
113
- connectivity: Connectivity pattern for CCL
114
- delta: For continuous images, allowed difference in adjacent voxel values
115
- return_N: Return tuple with component count
116
- binary_image: Treat input as binary image
117
- precomputed_ccl: Input is already a CCL result
118
119
Returns:
120
- NDArray: Image containing only the k largest components, relabeled 1 to k
121
- tuple[NDArray, int]: If return_N=True, includes actual number of components
122
"""
123
```
124
125
Usage examples:
126
127
```python
128
import cc3d
129
import numpy as np
130
131
# Create test image with many components
132
labels_in = np.random.randint(0, 50, (200, 200, 200))
133
134
# Get the 5 largest components
135
largest_5 = cc3d.largest_k(labels_in, k=5)
136
137
# Get largest components with count
138
largest_10, actual_count = cc3d.largest_k(
139
labels_in, k=10, return_N=True
140
)
141
print(f"Requested 10, got {actual_count} components")
142
143
# Single largest component
144
largest_1 = cc3d.largest_k(labels_in, k=1)
145
146
# No components (returns zeros)
147
nothing = cc3d.largest_k(labels_in, k=0)
148
149
# All components if k is larger than available
150
all_components = cc3d.largest_k(labels_in, k=1000)
151
152
# Use with continuous value CCL
153
grayscale = np.random.random((100, 100, 100)) * 255
154
largest_continuous = cc3d.largest_k(
155
grayscale, k=3, delta=15, connectivity=6
156
)
157
158
# Binary image processing
159
binary = (labels_in > 25).astype(np.uint8)
160
largest_binary = cc3d.largest_k(
161
binary, k=2, binary_image=True
162
)
163
164
# Skip CCL if already computed
165
cc_labels = cc3d.connected_components(labels_in)
166
largest_from_ccl = cc3d.largest_k(
167
cc_labels, k=3, precomputed_ccl=True
168
)
169
170
# Extract largest components and get original labels
171
original_labels = labels_in.copy()
172
largest = cc3d.largest_k(labels_in, k=5)
173
# Combine with original to preserve original label values
174
original_labels *= (largest > 0)
175
176
# Performance optimization with precomputed CCL and statistics
177
cc_labels = cc3d.connected_components(large_image)
178
stats = cc3d.statistics(cc_labels)
179
component_sizes = stats['voxel_counts'][1:] # Skip background
180
181
# Find which original labels correspond to largest components
182
sorted_indices = np.argsort(component_sizes)[-5:] # 5 largest
183
print(f"Original labels of largest components: {sorted_indices + 1}")
184
185
# Extract using precomputed CCL
186
largest_5_optimized = cc3d.largest_k(
187
cc_labels, k=5, precomputed_ccl=True
188
)
189
```
190
191
## Advanced Filtering Patterns
192
193
### Combining Filters
194
195
```python
196
# Multi-stage filtering: remove dust, then keep largest
197
cleaned = cc3d.dust(input_image, threshold=100)
198
final = cc3d.largest_k(cleaned, k=10, precomputed_ccl=True)
199
200
# Size range filtering with largest extraction
201
medium_sized = cc3d.dust(input_image, threshold=[200, 2000], invert=True)
202
top_medium = cc3d.largest_k(medium_sized, k=5, precomputed_ccl=True)
203
```
204
205
### Memory-Efficient Processing
206
207
```python
208
# Process large images in-place when possible
209
large_image = load_large_image()
210
211
# In-place dust removal
212
cc3d.dust(large_image, threshold=500, in_place=True)
213
214
# Then extract largest (creates new array, but input is already cleaned)
215
result = cc3d.largest_k(large_image, k=3, precomputed_ccl=True)
216
```
217
218
### Performance Optimization
219
220
```python
221
# For repeated filtering operations, precompute CCL once
222
cc_labels = cc3d.connected_components(input_image)
223
224
# Apply multiple filters efficiently
225
cleaned = cc3d.dust(cc_labels, threshold=100, precomputed_ccl=True)
226
largest = cc3d.largest_k(cc_labels, k=5, precomputed_ccl=True)
227
medium = cc3d.dust(cc_labels, threshold=[50, 500],
228
invert=True, precomputed_ccl=True)
229
```