A vision library for performing sliced inference on large images/small objects
—
SAHI provides a unified interface for loading and using detection models from various deep learning frameworks. The AutoDetectionModel factory class automatically handles framework-specific implementations while providing consistent APIs.
The main entry point for loading detection models from different frameworks. Automatically selects the appropriate model wrapper based on the model_type parameter.
class AutoDetectionModel:
@staticmethod
def from_pretrained(
model_type: str,
model_path: Optional[str] = None,
model: Optional[Any] = None,
config_path: Optional[str] = None,
device: Optional[str] = None,
mask_threshold: float = 0.5,
confidence_threshold: float = 0.3,
category_mapping: Optional[Dict] = None,
category_remapping: Optional[Dict] = None,
load_at_init: bool = True,
image_size: Optional[int] = None,
**kwargs,
) -> DetectionModel:
"""
Load a DetectionModel from given path and model type.
Parameters:
- model_type (str): Framework name ("ultralytics", "mmdet", "detectron2", "huggingface", "torchvision", "yolov5", "roboflow", "rtdetr")
- model_path (str, optional): Path to model weights file
- model (Any, optional): Pre-initialized model instance
- config_path (str, optional): Path to model config file (for MMDetection)
- device (str, optional): Device specification ("cpu", "cuda", "cuda:0", etc.)
- mask_threshold (float): Threshold for mask predictions (0-1)
- confidence_threshold (float): Minimum confidence for detections (0-1)
- category_mapping (Dict, optional): Map category IDs to names
- category_remapping (Dict, optional): Remap category names to new IDs
- load_at_init (bool): Whether to load model weights at initialization
- image_size (int, optional): Input image size for inference
Returns:
DetectionModel: Framework-specific model wrapper
"""SAHI supports the following detection frameworks:
MODEL_TYPE_TO_MODEL_CLASS_NAME = {
"ultralytics": "UltralyticsDetectionModel",
"rtdetr": "RTDetrDetectionModel",
"mmdet": "MmdetDetectionModel",
"yolov5": "Yolov5DetectionModel",
"detectron2": "Detectron2DetectionModel",
"huggingface": "HuggingfaceDetectionModel",
"torchvision": "TorchVisionDetectionModel",
"roboflow": "RoboflowDetectionModel",
}
ULTRALYTICS_MODEL_NAMES = ["yolov8", "yolov11", "yolo11", "ultralytics"]All model integrations inherit from the base DetectionModel class, providing consistent APIs across frameworks.
class DetectionModel:
def __init__(
self,
model_path: Optional[str] = None,
model: Optional[Any] = None,
config_path: Optional[str] = None,
device: Optional[str] = None,
mask_threshold: float = 0.5,
confidence_threshold: float = 0.3,
category_mapping: Optional[Dict] = None,
category_remapping: Optional[Dict] = None,
load_at_init: bool = True,
image_size: Optional[int] = None,
): ...
def load_model(self): ...
def set_model(self, model: Any): ...
def set_device(self, device: str): ...
def perform_inference(self, image: np.ndarray) -> List: ...
def convert_original_predictions(
self,
shift_amount: Optional[List[int]] = [0, 0],
full_shape: Optional[List[int]] = None,
) -> ObjectPrediction: ...class UltralyticsDetectionModel(DetectionModel):
"""
Ultralytics YOLO model wrapper for YOLOv8, YOLOv11, and other Ultralytics models.
Supports both detection and segmentation models.
"""class MmdetDetectionModel(DetectionModel):
"""
MMDetection framework integration supporting a wide range of detection
and segmentation models including Faster R-CNN, Mask R-CNN, RetinaNet, etc.
"""class Detectron2DetectionModel(DetectionModel):
"""
Facebook Detectron2 framework integration for state-of-the-art
object detection and instance segmentation models.
"""class HuggingfaceDetectionModel(DetectionModel):
"""
HuggingFace Transformers integration for transformer-based detection models
like DETR, RT-DETR, and other vision transformer architectures.
"""class TorchVisionDetectionModel(DetectionModel):
"""
PyTorch TorchVision integration for official PyTorch detection models
including Faster R-CNN, Mask R-CNN, RetinaNet, and SSD.
"""class Yolov5DetectionModel(DetectionModel):
"""
YOLOv5 model integration for Ultralytics YOLOv5 models with
custom loading and inference pipeline.
"""class RoboflowDetectionModel(DetectionModel):
"""
Roboflow platform integration for deploying and using models
trained on the Roboflow platform.
"""class RTDetrDetectionModel(DetectionModel):
"""
RT-DETR (Real-Time Detection Transformer) model integration
for fast transformer-based object detection.
"""from sahi import AutoDetectionModel
# Ultralytics YOLO model
yolo_model = AutoDetectionModel.from_pretrained(
model_type='ultralytics',
model_path='yolov8n.pt',
confidence_threshold=0.25,
device='cuda:0'
)
# MMDetection model
mmdet_model = AutoDetectionModel.from_pretrained(
model_type='mmdet',
model_path='checkpoint.pth',
config_path='configs/faster_rcnn_r50_fpn_1x_coco.py',
confidence_threshold=0.3,
device='cuda:0'
)
# HuggingFace model
hf_model = AutoDetectionModel.from_pretrained(
model_type='huggingface',
model_path='facebook/detr-resnet-50',
confidence_threshold=0.5,
device='cpu'
)
# Detectron2 model
d2_model = AutoDetectionModel.from_pretrained(
model_type='detectron2',
model_path='detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl',
confidence_threshold=0.5,
device='cuda:0'
)# Custom category mapping
category_mapping = {
0: "person",
1: "bicycle",
2: "car",
3: "motorcycle"
}
# Category remapping for custom datasets
category_remapping = {
"person": 1,
"vehicle": 2
}
model = AutoDetectionModel.from_pretrained(
model_type='ultralytics',
model_path='custom_model.pt',
confidence_threshold=0.25,
mask_threshold=0.5,
category_mapping=category_mapping,
category_remapping=category_remapping,
image_size=640,
device='cuda:0'
)import torch
from ultralytics import YOLO
# Load model externally
external_model = YOLO('yolov8n.pt')
# Pass to SAHI
sahi_model = AutoDetectionModel.from_pretrained(
model_type='ultralytics',
model=external_model,
confidence_threshold=0.25
)Install with Tessl CLI
npx tessl i tessl/pypi-sahi