CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-sahi

A vision library for performing sliced inference on large images/small objects

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities

SAHI provides comprehensive utility functions for computer vision operations, framework integrations, file I/O, and compatibility across different deep learning ecosystems. These utilities support the core functionality and provide additional convenience functions.

Capabilities

Computer Vision Utilities

Core computer vision operations including image reading, visualization, and format conversions.

def read_image_as_pil(image_path: str) -> Image.Image:
    """
    Read image as PIL Image object.
    
    Parameters:
    - image_path (str): Path to image file
    
    Returns:
    Image.Image: PIL Image object
    """

def visualize_object_predictions(
    image: np.ndarray,
    object_prediction_list: List[ObjectPrediction],
    rect_th: int = 3,
    text_size: float = 3,
    text_th: float = 3,
    color: tuple = None,
    hide_labels: bool = False,
    hide_conf: bool = False,
    output_dir: Optional[str] = None,
    file_name: Optional[str] = "prediction_visual",
) -> np.ndarray:
    """
    Visualize object predictions on image with bounding boxes and labels.
    
    Parameters:
    - image (np.ndarray): Input image array
    - object_prediction_list: List of ObjectPrediction instances
    - rect_th (int): Rectangle thickness for bounding boxes
    - text_size (float): Text size for labels
    - text_th (float): Text thickness
    - color (tuple, optional): Custom color for all boxes (BGR format)
    - hide_labels (bool): Hide class labels
    - hide_conf (bool): Hide confidence scores  
    - output_dir (str, optional): Directory to save visualization
    - file_name (str): Name for saved visualization file
    
    Returns:
    np.ndarray: Visualized image with annotations
    """

def crop_object_predictions(
    image: np.ndarray,
    object_prediction_list: List[ObjectPrediction],
    output_dir: str,
    file_name: str = "prediction_visual",
    export_format: str = "deepcrop",
) -> Dict:
    """
    Crop detected objects from image and save individually.
    
    Parameters:
    - image (np.ndarray): Source image array
    - object_prediction_list: List of ObjectPrediction instances
    - output_dir (str): Directory for saving cropped images
    - file_name (str): Base name for cropped files
    - export_format (str): Export format ("deepcrop", "crop")
    
    Returns:
    Dict: Dictionary with crop information and file paths
    """

Image Format and Conversion Utilities

def get_coco_segmentation_from_bool_mask(bool_mask: np.ndarray) -> List[List[float]]:
    """
    Convert boolean mask to COCO polygon segmentation format.
    
    Parameters:
    - bool_mask (np.ndarray): Boolean mask array
    
    Returns:
    List[List[float]]: COCO format polygon coordinates
    """

def get_bool_mask_from_coco_segmentation(
    segmentation: List, 
    height: int, 
    width: int
) -> np.ndarray:
    """
    Convert COCO segmentation to boolean mask.
    
    Parameters:
    - segmentation (List): COCO format polygon segmentation
    - height (int): Mask height
    - width (int): Mask width
    
    Returns:
    np.ndarray: Boolean mask array
    """

def get_bbox_from_coco_segmentation(segmentation: List) -> List[int]:
    """
    Extract bounding box from COCO segmentation.
    
    Parameters:
    - segmentation (List): COCO format polygon segmentation
    
    Returns:
    List[int]: Bounding box [xmin, ymin, width, height]
    """

Color Management

class Colors:
    """
    Color palette for consistent visualization across different plots and frameworks.
    Provides color management for bounding boxes, labels, and visualization elements.
    """
    
    def __init__(self):
        """Initialize color palette with predefined colors."""
        
    def __call__(self, i: int, bgr: bool = False) -> Tuple[int, ...]:
        """
        Get color for given index.
        
        Parameters:
        - i (int): Color index
        - bgr (bool): Return BGR format instead of RGB
        
        Returns:
        Tuple[int, ...]: Color tuple (RGB or BGR)
        """

File I/O Utilities

Comprehensive file operations supporting multiple formats and efficient data handling.

def save_json(data: Dict, save_path: str):
    """
    Save data as JSON file with proper formatting.
    
    Parameters:
    - data (Dict): Data to save
    - save_path (str): Output file path
    """

def load_json(load_path: str) -> Dict:
    """
    Load JSON file as dictionary.
    
    Parameters:
    - load_path (str): Path to JSON file
    
    Returns:
    Dict: Loaded data
    """

def save_pickle(data: Any, save_path: str):
    """
    Save data as pickle file for efficient storage.
    
    Parameters:
    - data (Any): Data to save
    - save_path (str): Output file path
    """

def load_pickle(load_path: str) -> Any:
    """
    Load pickle file.
    
    Parameters:
    - load_path (str): Path to pickle file
    
    Returns:
    Any: Loaded data
    """

def list_files(
    directory: str,
    contains: Optional[List[str]] = None,
    extensions: Optional[List[str]] = None,
    recursive: bool = True,
) -> List[str]:
    """
    List files in directory with filtering options.
    
    Parameters:
    - directory (str): Directory to search
    - contains (List[str], optional): Substrings that filenames must contain
    - extensions (List[str], optional): File extensions to include
    - recursive (bool): Search subdirectories recursively
    
    Returns:
    List[str]: List of matching file paths
    """

def download_from_url(url: str, save_path: str):
    """
    Download file from URL.
    
    Parameters:
    - url (str): URL to download from
    - save_path (str): Local path to save file
    """

def import_model_class(model_class_name: str, model_type: str):
    """
    Dynamically import model class based on type.
    
    Parameters:
    - model_class_name (str): Name of model class to import
    - model_type (str): Model framework type
    
    Returns:
    Type: Imported model class
    """

PyTorch Utilities

Utilities for PyTorch tensor operations and device management.

def empty_cuda_cache():
    """Clear CUDA memory cache to free up GPU memory."""

def to_float_tensor(image: Union[np.ndarray, Image.Image]) -> torch.Tensor:
    """
    Convert image to PyTorch float tensor.
    
    Parameters:
    - image: Input image (numpy array or PIL Image)
    
    Returns:
    torch.Tensor: Float tensor in CHW format
    """

def torch_to_numpy(tensor: torch.Tensor) -> np.ndarray:
    """
    Convert PyTorch tensor to numpy array.
    
    Parameters:
    - tensor (torch.Tensor): Input tensor
    
    Returns:
    np.ndarray: Numpy array
    """

def select_device(device: Optional[str] = None) -> torch.device:
    """
    Select appropriate PyTorch device for inference.
    
    Parameters:
    - device (str, optional): Device specification ("cpu", "cuda", "mps", etc.)
    
    Returns:
    torch.device: Selected PyTorch device
    """

Import and Environment Utilities

Utilities for checking dependencies and managing package imports.

def is_available(package: str) -> bool:
    """
    Check if package is available for import.
    
    Parameters:
    - package (str): Package name to check
    
    Returns:
    bool: True if package is available
    """

def check_requirements(
    requirements: List[str], 
    raise_exception: bool = True
):
    """
    Verify that required packages are installed.
    
    Parameters:
    - requirements (List[str]): List of required package names
    - raise_exception (bool): Whether to raise exception if packages missing
    
    Raises:
    ImportError: If required packages are missing and raise_exception=True
    """

def get_package_info(package_name: str) -> Dict[str, str]:
    """
    Get information about installed package.
    
    Parameters:
    - package_name (str): Name of package to query
    
    Returns:
    Dict[str, str]: Package information (version, location, etc.)
    """

def print_environment_info():
    """
    Print comprehensive environment and dependency information.
    Includes Python version, PyTorch version, CUDA availability, 
    system information, and installed package versions.
    """

Framework-Specific Utilities

Utilities for specific deep learning framework integrations.

# Detectron2 utilities
def convert_detectron2_bbox_format(bbox: List) -> List:
    """Convert Detectron2 bbox format to standard format."""

def convert_detectron2_mask_format(mask: np.ndarray) -> np.ndarray:  
    """Convert Detectron2 mask format to standard format."""

# MMDetection utilities  
def convert_mmdet_bbox_format(bbox: List) -> List:
    """Convert MMDetection bbox format to standard format."""

def convert_mmdet_mask_format(mask: np.ndarray) -> np.ndarray:
    """Convert MMDetection mask format to standard format."""

# TorchVision utilities
def convert_torchvision_bbox_format(bbox: torch.Tensor) -> List:
    """Convert TorchVision bbox format to standard format."""

# RT-DETR utilities
def convert_rtdetr_output_format(outputs: Dict) -> List:
    """Convert RT-DETR output format to standard ObjectPrediction format."""

File Path and Video Utilities

class Path:
    """Enhanced path handling with additional convenience methods."""
    
    def __init__(self, path: str):
        """Initialize path handler."""
    
    @property
    def suffix(self) -> str:
        """Get file extension."""
    
    @property
    def stem(self) -> str:
        """Get filename without extension."""

def increment_path(path: str, exist_ok: bool = False) -> str:
    """
    Increment file path to avoid overwrites.
    
    Parameters:
    - path (str): Original path
    - exist_ok (bool): Whether existing path is acceptable
    
    Returns:
    str: Incremented path (e.g., "file_1.txt", "file_2.txt")
    """

def get_video_reader(video_path: str):
    """
    Get video reader object for frame-by-frame processing.
    
    Parameters:
    - video_path (str): Path to video file
    
    Returns:
    Video reader object
    """

Constants

# Supported file extensions
IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif']
VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv']
IMAGE_EXTENSIONS_LOSSLESS = ['.png', '.tiff', '.bmp']
IMAGE_EXTENSIONS_LOSSY = ['.jpg', '.jpeg']

Usage Examples

Image Processing and Visualization

from sahi.utils.cv import read_image_as_pil, visualize_object_predictions
from sahi import get_sliced_prediction

# Read image
image = read_image_as_pil("input_image.jpg")

# Get predictions
result = get_sliced_prediction(
    image="input_image.jpg",
    detection_model=model
)

# Visualize predictions
visualized = visualize_object_predictions(
    image=np.array(image),
    object_prediction_list=result.object_prediction_list,
    rect_th=3,
    text_size=1.0,
    hide_conf=False,
    output_dir="visualizations/",
    file_name="result"
)

File Operations

from sahi.utils.file import save_json, load_json, list_files

# Save prediction results
predictions_data = {
    "predictions": [pred.json for pred in result.object_prediction_list],
    "metadata": {"model": "yolov8n", "confidence": 0.25}
}
save_json(predictions_data, "predictions.json")

# Load data
loaded_data = load_json("predictions.json")

# List image files
image_files = list_files(
    directory="dataset/",
    extensions=[".jpg", ".png"],
    contains=["train", "val"],
    recursive=True
)
print(f"Found {len(image_files)} image files")

Environment and Dependency Management

from sahi.utils.import_utils import is_available, check_requirements, print_environment_info

# Check if optional dependencies are available
if is_available("fiftyone"):
    print("FiftyOne integration available")

if is_available("mmdet"):
    print("MMDetection integration available")

# Verify required packages
try:
    check_requirements(["torch", "torchvision", "ultralytics"])
    print("All requirements satisfied")
except ImportError as e:
    print(f"Missing requirements: {e}")

# Print full environment info
print_environment_info()

PyTorch Utilities

from sahi.utils.torch_utils import select_device, empty_cuda_cache, to_float_tensor
import numpy as np

# Select optimal device
device = select_device("cuda")
print(f"Using device: {device}")

# Convert image to tensor
image_array = np.random.rand(480, 640, 3).astype(np.uint8)
tensor = to_float_tensor(image_array)
print(f"Tensor shape: {tensor.shape}")

# Clear CUDA cache after processing
empty_cuda_cache()

Custom Colors for Visualization

from sahi.utils.cv import Colors, visualize_object_predictions

# Initialize color palette
colors = Colors()

# Get specific colors
red = colors(0)      # First color in palette
blue = colors(1)     # Second color  
green = colors(2)    # Third color

# Use custom color for visualization
visualized = visualize_object_predictions(
    image=image_array,
    object_prediction_list=predictions,
    color=(0, 255, 0),  # Custom green color
    rect_th=2,
    text_size=0.8
)

File Path Management

from sahi.utils.file import increment_path, Path

# Avoid overwriting existing files
output_path = increment_path("results/experiment.json")
print(f"Using path: {output_path}")  # e.g., "results/experiment_1.json"

# Enhanced path handling
path = Path("dataset/images/sample.jpg")
print(f"Extension: {path.suffix}")  # ".jpg"
print(f"Filename: {path.stem}")     # "sample"

Format Conversions

from sahi.utils.cv import (
    get_coco_segmentation_from_bool_mask,
    get_bool_mask_from_coco_segmentation,
    get_bbox_from_coco_segmentation
)

# Create boolean mask
bool_mask = np.random.rand(100, 100) > 0.5

# Convert to COCO format
coco_segmentation = get_coco_segmentation_from_bool_mask(bool_mask)

# Convert back to boolean mask
reconstructed_mask = get_bool_mask_from_coco_segmentation(
    coco_segmentation, 100, 100
)

# Extract bounding box from segmentation
bbox = get_bbox_from_coco_segmentation(coco_segmentation)
print(f"Bounding box: {bbox}")

Install with Tessl CLI

npx tessl i tessl/pypi-sahi

docs

annotation-framework.md

cli.md

coco-integration.md

image-slicing.md

index.md

model-integration.md

postprocessing.md

prediction-functions.md

utilities.md

tile.json