tessl/pypi-sahi

A vision library for performing sliced inference on large images/small objects

—

Pending

Overview

Eval results

Files

Model Integration

Name: tessl/pypi-sahi
Author: tessl

SAHI provides a unified interface for loading and using detection models from various deep learning frameworks. The AutoDetectionModel factory class automatically handles framework-specific implementations while providing consistent APIs.

Capabilities

AutoDetectionModel Factory

The main entry point for loading detection models from different frameworks. Automatically selects the appropriate model wrapper based on the model_type parameter.

class AutoDetectionModel:
    @staticmethod
    def from_pretrained(
        model_type: str,
        model_path: Optional[str] = None,
        model: Optional[Any] = None,
        config_path: Optional[str] = None,
        device: Optional[str] = None,
        mask_threshold: float = 0.5,
        confidence_threshold: float = 0.3,
        category_mapping: Optional[Dict] = None,
        category_remapping: Optional[Dict] = None,
        load_at_init: bool = True,
        image_size: Optional[int] = None,
        **kwargs,
    ) -> DetectionModel:
        """
        Load a DetectionModel from given path and model type.
        
        Parameters:
        - model_type (str): Framework name ("ultralytics", "mmdet", "detectron2", "huggingface", "torchvision", "yolov5", "roboflow", "rtdetr")
        - model_path (str, optional): Path to model weights file
        - model (Any, optional): Pre-initialized model instance
        - config_path (str, optional): Path to model config file (for MMDetection)
        - device (str, optional): Device specification ("cpu", "cuda", "cuda:0", etc.)
        - mask_threshold (float): Threshold for mask predictions (0-1)
        - confidence_threshold (float): Minimum confidence for detections (0-1)
        - category_mapping (Dict, optional): Map category IDs to names
        - category_remapping (Dict, optional): Remap category names to new IDs
        - load_at_init (bool): Whether to load model weights at initialization
        - image_size (int, optional): Input image size for inference
        
        Returns:
        DetectionModel: Framework-specific model wrapper
        """

Supported Model Types

SAHI supports the following detection frameworks:

MODEL_TYPE_TO_MODEL_CLASS_NAME = {
    "ultralytics": "UltralyticsDetectionModel",
    "rtdetr": "RTDetrDetectionModel", 
    "mmdet": "MmdetDetectionModel",
    "yolov5": "Yolov5DetectionModel",
    "detectron2": "Detectron2DetectionModel",
    "huggingface": "HuggingfaceDetectionModel",
    "torchvision": "TorchVisionDetectionModel",
    "roboflow": "RoboflowDetectionModel",
}

ULTRALYTICS_MODEL_NAMES = ["yolov8", "yolov11", "yolo11", "ultralytics"]

Base DetectionModel Interface

All model integrations inherit from the base DetectionModel class, providing consistent APIs across frameworks.

class DetectionModel:
    def __init__(
        self,
        model_path: Optional[str] = None,
        model: Optional[Any] = None,
        config_path: Optional[str] = None,
        device: Optional[str] = None,
        mask_threshold: float = 0.5,
        confidence_threshold: float = 0.3,
        category_mapping: Optional[Dict] = None,
        category_remapping: Optional[Dict] = None,
        load_at_init: bool = True,
        image_size: Optional[int] = None,
    ): ...
    
    def load_model(self): ...
    def set_model(self, model: Any): ...
    def set_device(self, device: str): ...
    def perform_inference(self, image: np.ndarray) -> List: ...
    def convert_original_predictions(
        self, 
        shift_amount: Optional[List[int]] = [0, 0],
        full_shape: Optional[List[int]] = None,
    ) -> ObjectPrediction: ...

Framework-Specific Models

Ultralytics (YOLO) Integration

class UltralyticsDetectionModel(DetectionModel):
    """
    Ultralytics YOLO model wrapper for YOLOv8, YOLOv11, and other Ultralytics models.
    Supports both detection and segmentation models.
    """

MMDetection Integration

class MmdetDetectionModel(DetectionModel):
    """
    MMDetection framework integration supporting a wide range of detection
    and segmentation models including Faster R-CNN, Mask R-CNN, RetinaNet, etc.
    """

Detectron2 Integration

class Detectron2DetectionModel(DetectionModel):
    """
    Facebook Detectron2 framework integration for state-of-the-art
    object detection and instance segmentation models.
    """

HuggingFace Transformers Integration

class HuggingfaceDetectionModel(DetectionModel):
    """
    HuggingFace Transformers integration for transformer-based detection models
    like DETR, RT-DETR, and other vision transformer architectures.
    """

TorchVision Integration

class TorchVisionDetectionModel(DetectionModel):
    """
    PyTorch TorchVision integration for official PyTorch detection models
    including Faster R-CNN, Mask R-CNN, RetinaNet, and SSD.
    """

YOLOv5 Integration

class Yolov5DetectionModel(DetectionModel):
    """
    YOLOv5 model integration for Ultralytics YOLOv5 models with
    custom loading and inference pipeline.
    """

Roboflow Integration

class RoboflowDetectionModel(DetectionModel):
    """
    Roboflow platform integration for deploying and using models
    trained on the Roboflow platform.
    """

RT-DETR Integration

class RTDetrDetectionModel(DetectionModel):
    """
    RT-DETR (Real-Time Detection Transformer) model integration
    for fast transformer-based object detection.
    """

Usage Examples

Loading Different Model Types

from sahi import AutoDetectionModel

# Ultralytics YOLO model
yolo_model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model_path='yolov8n.pt',
    confidence_threshold=0.25,
    device='cuda:0'
)

# MMDetection model  
mmdet_model = AutoDetectionModel.from_pretrained(
    model_type='mmdet',
    model_path='checkpoint.pth',
    config_path='configs/faster_rcnn_r50_fpn_1x_coco.py',
    confidence_threshold=0.3,
    device='cuda:0'
)

# HuggingFace model
hf_model = AutoDetectionModel.from_pretrained(
    model_type='huggingface',
    model_path='facebook/detr-resnet-50',
    confidence_threshold=0.5,
    device='cpu'
)

# Detectron2 model
d2_model = AutoDetectionModel.from_pretrained(
    model_type='detectron2',
    model_path='detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl',
    confidence_threshold=0.5,
    device='cuda:0'
)

Advanced Configuration

# Custom category mapping
category_mapping = {
    0: "person",
    1: "bicycle", 
    2: "car",
    3: "motorcycle"
}

# Category remapping for custom datasets
category_remapping = {
    "person": 1,
    "vehicle": 2
}

model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model_path='custom_model.pt',
    confidence_threshold=0.25,
    mask_threshold=0.5,
    category_mapping=category_mapping,
    category_remapping=category_remapping,
    image_size=640,
    device='cuda:0'
)

Using Pre-loaded Models

import torch
from ultralytics import YOLO

# Load model externally
external_model = YOLO('yolov8n.pt')

# Pass to SAHI
sahi_model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model=external_model,
    confidence_threshold=0.25
)

Install with Tessl CLI