A vision library for performing sliced inference on large images/small objects
—
SAHI's core prediction capabilities provide standard inference, sliced inference for large images, batch processing, and comprehensive parameter control. These functions form the foundation of SAHI's slicing-aided inference approach.
Performs object detection on a single image without slicing. Suitable for regular-sized images or when slicing is not needed.
def get_prediction(
image,
detection_model,
shift_amount: list = [0, 0],
full_shape=None,
postprocess: Optional[PostprocessPredictions] = None,
verbose: int = 0,
exclude_classes_by_name: Optional[List[str]] = None,
exclude_classes_by_id: Optional[List[int]] = None,
) -> PredictionResult:
"""
Perform detection prediction on a single image.
Parameters:
- image: Image path (str) or numpy array
- detection_model: Loaded DetectionModel instance
- shift_amount (list): Coordinate shift [shift_x, shift_y] for prediction mapping
- full_shape: Original image shape [height, width] if using crops
- postprocess: PostprocessPredictions instance for combining predictions
- verbose (int): Verbosity level (0=silent, 1=print duration)
- exclude_classes_by_name: List of class names to exclude from results
- exclude_classes_by_id: List of class IDs to exclude from results
Returns:
PredictionResult: Container with predictions, image, and timing info
"""The core SAHI functionality that slices large images into overlapping patches, performs inference on each patch, and intelligently combines results.
def get_sliced_prediction(
image,
detection_model,
slice_height: Optional[int] = None,
slice_width: Optional[int] = None,
overlap_height_ratio: float = 0.2,
overlap_width_ratio: float = 0.2,
perform_standard_pred: bool = True,
postprocess_type: str = "GREEDYNMM",
postprocess_match_metric: str = "IOS",
postprocess_match_threshold: float = 0.5,
postprocess_class_agnostic: bool = False,
verbose: int = 1,
merge_buffer_length: Optional[int] = None,
auto_slice_resolution: bool = True,
slice_export_prefix: Optional[str] = None,
slice_dir: Optional[str] = None,
exclude_classes_by_name: Optional[List[str]] = None,
exclude_classes_by_id: Optional[List[int]] = None,
) -> PredictionResult:
"""
Perform sliced inference on large images for better small object detection.
Parameters:
- image: Image path (str) or numpy array
- detection_model: Loaded DetectionModel instance
- slice_height (int, optional): Height of each slice in pixels
- slice_width (int, optional): Width of each slice in pixels
- overlap_height_ratio (float): Vertical overlap ratio between slices (0-1)
- overlap_width_ratio (float): Horizontal overlap ratio between slices (0-1)
- perform_standard_pred (bool): Perform standard prediction on full image in addition to sliced prediction
- postprocess_type (str): Postprocessing method ("GREEDYNMM", "NMM", "NMS", "LSNMS")
- postprocess_match_metric (str): Overlap metric for combining predictions ("IOU", "IOS")
- postprocess_match_threshold (float): Overlap threshold for merging (0-1)
- postprocess_class_agnostic (bool): Whether to ignore class when merging
- verbose (int): Verbosity level (0=silent, 1=progress, 2=detailed)
- merge_buffer_length (int, optional): Buffer length for low memory sliced prediction
- auto_slice_resolution (bool): Auto-calculate slice dimensions from image size
- slice_export_prefix (str, optional): Prefix for exported slice files
- slice_dir (str, optional): Directory to save slice images
- exclude_classes_by_name: List of class names to exclude
- exclude_classes_by_id: List of class IDs to exclude
Returns:
PredictionResult: Combined predictions from all slices
"""High-level prediction function with extensive configuration options for batch processing, video processing, and output management.
def predict(
model_type: str = "yolov8",
model_path: Optional[str] = None,
model_device: str = None,
model_confidence_threshold: float = 0.25,
source: Optional[str] = None,
slice_height: int = None,
slice_width: int = None,
overlap_height_ratio: float = 0.2,
overlap_width_ratio: float = 0.2,
postprocess_type: str = "GREEDYNMM",
postprocess_match_metric: str = "IOS",
postprocess_match_threshold: float = 0.5,
postprocess_class_agnostic: bool = False,
export_pickle: bool = False,
export_crop: bool = False,
export_visual: bool = True,
project: str = "runs/predict",
name: str = "exp",
return_dict: bool = False,
force_postprocess: bool = False,
frame_skip_interval: int = 0,
export_format: str = "coco",
verbose: int = 1,
crop_class_agnostic: bool = True,
desired_name2id: Optional[Dict[str, int]] = None,
auto_slice_resolution: bool = True,
) -> Optional[Dict]:
"""
Comprehensive prediction pipeline with model loading, inference, and export.
Parameters:
- model_type (str): Detection framework ("ultralytics", "mmdet", etc.)
- model_path (str): Path to model weights
- model_device (str): Device for inference ("cpu", "cuda", etc.)
- model_confidence_threshold (float): Minimum confidence for detections
- source (str): Input path (image, directory, or video file)
- slice_height (int): Slice height in pixels (None for auto)
- slice_width (int): Slice width in pixels (None for auto)
- overlap_height_ratio (float): Vertical overlap between slices
- overlap_width_ratio (float): Horizontal overlap between slices
- postprocess_type (str): Postprocessing algorithm
- postprocess_match_metric (str): Overlap calculation method
- postprocess_match_threshold (float): Threshold for combining predictions
- postprocess_class_agnostic (bool): Class-agnostic postprocessing
- export_pickle (bool): Save predictions as pickle files
- export_crop (bool): Export cropped detected objects
- export_visual (bool): Export visualization images
- project (str): Base directory for outputs
- name (str): Experiment name for output subdirectory
- return_dict (bool): Return results as dictionary
- force_postprocess (bool): Force postprocessing even for single predictions
- frame_skip_interval (int): Skip frames in video processing
- export_format (str): Output format ("coco", "yolo", "fiftyone")
- verbose (int): Verbosity level
- crop_class_agnostic (bool): Class-agnostic cropping
- desired_name2id (Dict): Custom category name to ID mapping
- auto_slice_resolution (bool): Auto-calculate slice parameters
Returns:
Dict or None: Prediction results if return_dict=True
"""Specialized prediction function for FiftyOne datasets with seamless integration and result management.
def predict_fiftyone(
model_type: str = "mmdet",
model_path: Optional[str] = None,
model_config_path: Optional[str] = None,
model_confidence_threshold: float = 0.25,
model_device: Optional[str] = None,
model_category_mapping: Optional[dict] = None,
model_category_remapping: Optional[dict] = None,
dataset_json_path: str = "",
image_dir: str = "",
no_standard_prediction: bool = False,
no_sliced_prediction: bool = False,
image_size: Optional[int] = None,
slice_height: int = 256,
slice_width: int = 256,
overlap_height_ratio: float = 0.2,
overlap_width_ratio: float = 0.2,
postprocess_type: str = "GREEDYNMM",
postprocess_match_metric: str = "IOS",
postprocess_match_threshold: float = 0.5,
postprocess_class_agnostic: bool = False,
verbose: int = 1,
exclude_classes_by_name: Optional[List[str]] = None,
exclude_classes_by_id: Optional[List[int]] = None,
):
"""
Perform predictions on FiftyOne datasets with automatic result integration.
Parameters:
- model_type (str): Detection framework type ("mmdet", "yolov5", etc.)
- model_path (str, optional): Path to model weights
- model_config_path (str, optional): Path to model config file (for MMDetection)
- model_confidence_threshold (float): Detection confidence threshold
- model_device (str, optional): Inference device ("cpu", "cuda", etc.)
- model_category_mapping (dict, optional): Category ID to name mapping
- model_category_remapping (dict, optional): Category remapping after inference
- dataset_json_path (str): Path to COCO format dataset JSON
- image_dir (str): Directory containing dataset images
- no_standard_prediction (bool): Skip standard (full image) prediction
- no_sliced_prediction (bool): Skip sliced prediction
- image_size (int, optional): Input image size for inference
- slice_height (int): Slice height for large images
- slice_width (int): Slice width for large images
- overlap_height_ratio (float): Vertical slice overlap
- overlap_width_ratio (float): Horizontal slice overlap
- postprocess_type (str): Postprocessing method
- postprocess_match_metric (str): Overlap metric for combining
- postprocess_match_threshold (float): Overlap threshold
- postprocess_class_agnostic (bool): Class-agnostic postprocessing
- verbose (int): Verbosity level
- exclude_classes_by_name (List[str], optional): Class names to exclude
- exclude_classes_by_id (List[int], optional): Class IDs to exclude
Returns:
FiftyOne dataset with predictions integrated
"""def filter_predictions(
object_prediction_list: List[ObjectPrediction],
exclude_classes_by_name: Optional[List[str]] = None,
exclude_classes_by_id: Optional[List[int]] = None
) -> List[ObjectPrediction]:
"""
Filter predictions by excluding specified classes.
Parameters:
- object_prediction_list: List of ObjectPrediction instances
- exclude_classes_by_name: Class names to exclude
- exclude_classes_by_id: Class IDs to exclude
Returns:
List of filtered ObjectPrediction instances
"""from sahi import AutoDetectionModel, get_sliced_prediction
# Load model
model = AutoDetectionModel.from_pretrained(
model_type='ultralytics',
model_path='yolov8n.pt',
confidence_threshold=0.3
)
# Perform sliced inference
result = get_sliced_prediction(
image="large_image.jpg",
detection_model=model,
slice_height=640,
slice_width=640,
overlap_height_ratio=0.2,
overlap_width_ratio=0.2
)
print(f"Found {len(result.object_prediction_list)} objects")from sahi.predict import get_sliced_prediction
from sahi.postprocess.combine import GreedyNMMPostprocess
# Custom postprocessing
postprocess = GreedyNMMPostprocess(
match_threshold=0.5,
match_metric="IOU",
class_agnostic=False
)
result = get_sliced_prediction(
image="image.jpg",
detection_model=model,
slice_height=512,
slice_width=512,
postprocess=postprocess,
verbose=2
)from sahi.predict import predict
# Process entire directory with exports
predict(
model_type="ultralytics",
model_path="yolov8n.pt",
source="images/",
slice_height=640,
slice_width=640,
export_visual=True,
export_crop=True,
export_format="coco",
project="results",
name="experiment_1"
)# Process video with frame skipping
predict(
model_type="ultralytics",
model_path="yolov8n.pt",
source="video.mp4",
frame_skip_interval=5, # Process every 5th frame
slice_height=640,
slice_width=640,
export_visual=True
)# Exclude specific classes
result = get_sliced_prediction(
image="image.jpg",
detection_model=model,
exclude_classes_by_name=["person", "bicycle"],
exclude_classes_by_id=[2, 3, 5]
)Install with Tessl CLI
npx tessl i tessl/pypi-sahi