High-performance connected components analysis for 2D and 3D multilabel images with support for 26, 18, and 6-connected neighborhoods.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced filtering functions for manipulating connected components by size and properties, including dust removal for cleaning up small artifacts and extraction of the largest components for focus on primary structures.
Remove small connected components ("dust") from images with flexible threshold options and efficient processing modes. Supports both size-based filtering and range-based filtering with inversion options.
def dust(
img: NDArray[typing.Any],
threshold: Union[int,float,tuple[int,int],tuple[float,float],list[int],list[float]],
connectivity: Literal[4,6,8,18,26] = 26,
in_place: bool = False,
binary_image: bool = False,
precomputed_ccl: bool = False,
invert: bool = False,
return_N: bool = False,
) -> Union[NDArray[typing.Any], tuple[NDArray[typing.Any], int]]:
"""
Remove connected components smaller than threshold ("dust") from the image.
Parameters:
- img: A 2D or 3D image
- threshold:
int/float: discard components smaller than this in voxels
tuple/list: discard components outside range [lower, upper)
- connectivity: Connectivity pattern to use for CCL
- in_place: Whether to modify the input image or create a copy
- binary_image: Treat input as binary image
- precomputed_ccl: Input is already a CCL result, skip CCL computation
- invert: Switch threshold direction (< becomes >=, between becomes outside)
- return_N: Also return the number of remaining components
Returns:
- NDArray: Cleaned image with dust removed
- tuple[NDArray, int]: If return_N=True, includes remaining component count
"""Usage examples:
import cc3d
import numpy as np
# Create noisy image with small artifacts
labels_in = np.random.randint(0, 100, (100, 100, 100))
labels_in = cc3d.connected_components(labels_in) # Get connected components first
# Basic dust removal - remove components smaller than 50 voxels
cleaned = cc3d.dust(labels_in, threshold=50)
# Remove dust and get count of remaining components
cleaned, remaining_count = cc3d.dust(
labels_in, threshold=50, return_N=True
)
print(f"Kept {remaining_count} components after dust removal")
# Range-based filtering - keep only components between 100-1000 voxels
medium_components = cc3d.dust(
labels_in, threshold=[100, 1000], invert=True
)
# Remove components outside a size range (keep very small and very large)
filtered = cc3d.dust(
labels_in, threshold=[50, 500] # Remove components 50-499 voxels
)
# In-place modification (memory efficient)
original_image = labels_in.copy()
cc3d.dust(original_image, threshold=100, in_place=True)
# Work with binary images
binary_input = (labels_in > 0).astype(np.uint8)
cleaned_binary = cc3d.dust(
binary_input, threshold=200, binary_image=True
)
# Skip CCL computation if input is already connected components
cc_labels = cc3d.connected_components(raw_input)
cleaned = cc3d.dust(
cc_labels, threshold=75, precomputed_ccl=True
)
# Remove large components instead of small ones
no_large_components = cc3d.dust(
labels_in, threshold=1000, invert=True
)Extract the k largest connected components from an image with efficient processing and optional relabeling, useful for focusing analysis on primary structures.
def largest_k(
img: NDArray[typing.Any],
k: int,
connectivity: Literal[4,6,8,18,26] = 26,
delta: Union[int,float] = 0,
return_N: bool = False,
binary_image: bool = False,
precomputed_ccl: bool = False,
) -> Union[NDArray[Union[np.bool_,np.uint16,np.uint32,np.uint64]], tuple[NDArray[Union[np.bool_,np.uint16,np.uint32,np.uint64]], int]]:
"""
Returns the k largest connected components in the image.
Parameters:
- img: Input image
- k: Number of largest components to keep (>= 0)
- connectivity: Connectivity pattern for CCL
- delta: For continuous images, allowed difference in adjacent voxel values
- return_N: Return tuple with component count
- binary_image: Treat input as binary image
- precomputed_ccl: Input is already a CCL result
Returns:
- NDArray: Image containing only the k largest components, relabeled 1 to k
- tuple[NDArray, int]: If return_N=True, includes actual number of components
"""Usage examples:
import cc3d
import numpy as np
# Create test image with many components
labels_in = np.random.randint(0, 50, (200, 200, 200))
# Get the 5 largest components
largest_5 = cc3d.largest_k(labels_in, k=5)
# Get largest components with count
largest_10, actual_count = cc3d.largest_k(
labels_in, k=10, return_N=True
)
print(f"Requested 10, got {actual_count} components")
# Single largest component
largest_1 = cc3d.largest_k(labels_in, k=1)
# No components (returns zeros)
nothing = cc3d.largest_k(labels_in, k=0)
# All components if k is larger than available
all_components = cc3d.largest_k(labels_in, k=1000)
# Use with continuous value CCL
grayscale = np.random.random((100, 100, 100)) * 255
largest_continuous = cc3d.largest_k(
grayscale, k=3, delta=15, connectivity=6
)
# Binary image processing
binary = (labels_in > 25).astype(np.uint8)
largest_binary = cc3d.largest_k(
binary, k=2, binary_image=True
)
# Skip CCL if already computed
cc_labels = cc3d.connected_components(labels_in)
largest_from_ccl = cc3d.largest_k(
cc_labels, k=3, precomputed_ccl=True
)
# Extract largest components and get original labels
original_labels = labels_in.copy()
largest = cc3d.largest_k(labels_in, k=5)
# Combine with original to preserve original label values
original_labels *= (largest > 0)
# Performance optimization with precomputed CCL and statistics
cc_labels = cc3d.connected_components(large_image)
stats = cc3d.statistics(cc_labels)
component_sizes = stats['voxel_counts'][1:] # Skip background
# Find which original labels correspond to largest components
sorted_indices = np.argsort(component_sizes)[-5:] # 5 largest
print(f"Original labels of largest components: {sorted_indices + 1}")
# Extract using precomputed CCL
largest_5_optimized = cc3d.largest_k(
cc_labels, k=5, precomputed_ccl=True
)# Multi-stage filtering: remove dust, then keep largest
cleaned = cc3d.dust(input_image, threshold=100)
final = cc3d.largest_k(cleaned, k=10, precomputed_ccl=True)
# Size range filtering with largest extraction
medium_sized = cc3d.dust(input_image, threshold=[200, 2000], invert=True)
top_medium = cc3d.largest_k(medium_sized, k=5, precomputed_ccl=True)# Process large images in-place when possible
large_image = load_large_image()
# In-place dust removal
cc3d.dust(large_image, threshold=500, in_place=True)
# Then extract largest (creates new array, but input is already cleaned)
result = cc3d.largest_k(large_image, k=3, precomputed_ccl=True)# For repeated filtering operations, precompute CCL once
cc_labels = cc3d.connected_components(input_image)
# Apply multiple filters efficiently
cleaned = cc3d.dust(cc_labels, threshold=100, precomputed_ccl=True)
largest = cc3d.largest_k(cc_labels, k=5, precomputed_ccl=True)
medium = cc3d.dust(cc_labels, threshold=[50, 500],
invert=True, precomputed_ccl=True)Install with Tessl CLI
npx tessl i tessl/pypi-connected-components-3d