tessl/pypi-sahi

A vision library for performing sliced inference on large images/small objects

—

Pending

Overview

Eval results

Files

Annotation Framework

Name: tessl/pypi-sahi
Author: tessl

SAHI provides a comprehensive annotation framework with data structures for handling bounding boxes, masks, categories, and complete object annotations. The framework supports multiple format conversions and provides consistent APIs for manipulation across different computer vision tasks.

Capabilities

BoundingBox

Immutable dataclass representing rectangular regions with coordinates and optional shift amounts for coordinate transformation.

@dataclass(frozen=True)
class BoundingBox:
    box: Union[Tuple[float, float, float, float], List[float]]
    shift_amount: Tuple[int, int] = (0, 0)
    
    def __post_init__(self): ...
    
    @property
    def minx(self) -> float: ...
    
    @property  
    def miny(self) -> float: ...
    
    @property
    def maxx(self) -> float: ...
    
    @property
    def maxy(self) -> float: ...
    
    @property
    def area(self) -> float: ...
    
    def get_expanded_box(self, ratio: float = 0.1) -> "BoundingBox":
        """
        Return expanded bounding box by specified ratio.
        
        Parameters:
        - ratio (float): Expansion ratio (0.1 = 10% expansion)
        
        Returns:
        BoundingBox: New expanded bounding box
        """
    
    def to_xywh(self) -> List[float]:
        """
        Convert to [xmin, ymin, width, height] format.
        
        Returns:
        List[float]: Bounding box in xywh format
        """
    
    def to_coco_bbox(self) -> List[float]:
        """
        Convert to COCO format [xmin, ymin, width, height].
        
        Returns:
        List[float]: COCO format bounding box
        """
    
    def to_xyxy(self) -> List[float]:
        """
        Convert to [xmin, ymin, xmax, ymax] format.
        
        Returns:
        List[float]: Bounding box in xyxy format
        """
    
    def to_voc_bbox(self) -> List[int]:
        """
        Convert to VOC format [xmin, ymin, xmax, ymax] as integers.
        
        Returns:
        List[int]: VOC format bounding box
        """
    
    def get_shifted_box(self) -> "BoundingBox":
        """
        Return shifted BoundingBox using the shift_amount.
        
        Returns:
        BoundingBox: New shifted bounding box
        """

Mask

Segmentation mask class handling COCO format polygon segmentation and boolean masks.

class Mask:
    def __init__(
        self,
        segmentation: List[List[float]],
        full_shape: List[int],
        shift_amount: list = [0, 0],
    ):
        """
        Initialize mask from COCO segmentation format.
        
        Parameters:
        - segmentation (List[List[float]]): COCO format polygon segmentation coordinates
        - full_shape (List[int]): Full image dimensions [height, width]
        - shift_amount (list): Coordinate shift [shift_x, shift_y]
        """
    
    @property
    def bool_mask(self) -> np.ndarray: ...
    
    @property  
    def segmentation(self) -> List: ...
    
    @property
    def area(self) -> int: ...
    
    @classmethod
    def from_float_mask(
        cls,
        mask: np.ndarray,
        full_shape: List[int],
        mask_threshold: float = 0.5,
        shift_amount: list = [0, 0],
    ) -> "Mask":
        """
        Create mask from float numpy array using threshold.
        
        Parameters:
        - mask (np.ndarray): Float mask array (0-1 values)
        - full_shape (List[int]): Full image dimensions [height, width]
        - mask_threshold (float): Threshold for converting to boolean
        - shift_amount (list): Coordinate shift [shift_x, shift_y]
        
        Returns:
        Mask: New Mask instance
        """
    
    @classmethod
    def from_bool_mask(
        cls,
        bool_mask: np.ndarray,
        full_shape: List[int],
        shift_amount: list = [0, 0],
    ) -> "Mask":
        """
        Create mask from boolean numpy array.
        
        Parameters:
        - bool_mask (np.ndarray): Boolean mask array
        - full_shape (List[int]): Full image dimensions [height, width]
        - shift_amount (list): Coordinate shift [shift_x, shift_y]
        
        Returns:
        Mask: New Mask instance
        """
    
    def get_shifted_mask(self) -> "Mask":
        """
        Return shifted mask using shift_amount.
        
        Returns:
        Mask: New shifted mask
        """

ObjectAnnotation

Complete annotation combining bounding box, mask, and category information with extensive format conversion capabilities.

class ObjectAnnotation:
    def __init__(
        self,
        bbox: Optional[BoundingBox] = None,
        category: Optional[Category] = None, 
        mask: Optional[Mask] = None,
        shift_amount: Optional[List[int]] = None,
        full_shape: Optional[List[int]] = None,
    ):
        """
        Initialize complete object annotation.
        
        Parameters:
        - bbox (BoundingBox, optional): Bounding box
        - category (Category, optional): Object category
        - mask (Mask, optional): Segmentation mask
        - shift_amount (List[int], optional): Coordinate shift [x, y]
        - full_shape (List[int], optional): Full image shape [height, width]
        """
    
    @property
    def area(self) -> Union[int, float]: ...
    
    @classmethod
    def from_bool_mask(
        cls,
        bool_mask: np.ndarray,
        full_shape: List[int],
        category_id: Optional[int] = None,
        category_name: Optional[str] = None,
        shift_amount: List[int] = [0, 0],
    ) -> "ObjectAnnotation":
        """
        Create annotation from boolean mask.
        
        Parameters:
        - bool_mask (np.ndarray): Boolean segmentation mask
        - full_shape (List[int]): Full image dimensions [height, width]
        - category_id (int, optional): Category ID
        - category_name (str, optional): Category name
        - shift_amount (List[int]): Coordinate shift
        
        Returns:
        ObjectAnnotation: New annotation instance
        """
    
    @classmethod
    def from_coco_segmentation(
        cls,
        segmentation: List,
        full_shape: List[int],
        category_id: Optional[int] = None,
        category_name: Optional[str] = None,
        shift_amount: List[int] = [0, 0],
    ) -> "ObjectAnnotation":
        """
        Create annotation from COCO segmentation format.
        
        Parameters:
        - segmentation (List): COCO format polygon segmentation
        - full_shape (List[int]): Full image dimensions
        - category_id (int, optional): Category ID
        - category_name (str, optional): Category name
        - shift_amount (List[int]): Coordinate shift
        
        Returns:
        ObjectAnnotation: New annotation instance
        """
    
    @classmethod
    def from_coco_bbox(
        cls,
        bbox: List[Union[int, float]],
        category_id: Optional[int] = None,
        category_name: Optional[str] = None,
        shift_amount: List[int] = [0, 0],
    ) -> "ObjectAnnotation":
        """
        Create annotation from COCO bounding box format.
        
        Parameters:
        - bbox (List): COCO format bbox [x, y, width, height]
        - category_id (int, optional): Category ID
        - category_name (str, optional): Category name
        - shift_amount (List[int]): Coordinate shift
        
        Returns:
        ObjectAnnotation: New annotation instance
        """
    
    @classmethod
    def from_coco_annotation_dict(
        cls,
        annotation_dict: Dict,
        full_shape: List[int],
        shift_amount: List[int] = [0, 0],
    ) -> "ObjectAnnotation":
        """
        Create annotation from COCO annotation dictionary.
        
        Parameters:
        - annotation_dict (Dict): COCO annotation dictionary
        - full_shape (List[int]): Full image dimensions
        - shift_amount (List[int]): Coordinate shift
        
        Returns:
        ObjectAnnotation: New annotation instance
        """
    
    def to_coco_annotation(self) -> "CocoAnnotation":
        """Convert to CocoAnnotation format."""
    
    def to_coco_prediction(self) -> "CocoPrediction":
        """Convert to CocoPrediction format."""
    
    def to_shapely_annotation(self) -> "ShapelyAnnotation":
        """Convert to Shapely annotation format."""
    
    def to_imantics_annotation(self):
        """Convert to Imantics annotation format."""
    
    def deepcopy(self) -> "ObjectAnnotation":
        """Return deep copy of annotation."""
    
    def get_shifted_object_annotation(self) -> "ObjectAnnotation":
        """Return shifted annotation using shift_amount."""

ObjectPrediction

Object detection prediction with confidence score, inheriting from ObjectAnnotation with additional prediction-specific methods.

class ObjectPrediction(ObjectAnnotation):
    def __init__(
        self,
        bbox: Optional[List[int]] = None,
        category_id: Optional[int] = None,
        category_name: Optional[str] = None,
        segmentation: Optional[List[List[float]]] = None,
        score: float = 0.0,
        shift_amount: Optional[List[int]] = [0, 0],
        full_shape: Optional[List[int]] = None,
    ):
        """
        Initialize object prediction with confidence score.
        
        Parameters:
        - bbox (List[int], optional): Bounding box coordinates [minx, miny, maxx, maxy]
        - category_id (int, optional): Category ID
        - category_name (str, optional): Category name
        - segmentation (List[List[float]], optional): COCO format polygon segmentation
        - score (float): Confidence score between 0 and 1
        - shift_amount (List[int], optional): Coordinate shift [shift_x, shift_y]
        - full_shape (List[int], optional): Full image dimensions [height, width]
        """
    
    def get_shifted_object_prediction(self) -> "ObjectPrediction":
        """
        Return shifted prediction for full image coordinate mapping.
        
        Returns:
        ObjectPrediction: New shifted prediction
        """
    
    def to_coco_prediction(self) -> "CocoPrediction":
        """
        Convert to COCO prediction format.
        
        Returns:
        CocoPrediction: COCO format prediction
        """
    
    def to_fiftyone_detection(self):
        """
        Convert to FiftyOne detection format.
        
        Returns:
        FiftyOne Detection object
        """

PredictionScore

Wrapper for prediction confidence scores with comparison operations.

class PredictionScore:
    def __init__(self, value: Union[float, np.ndarray]):
        """
        Initialize prediction score.
        
        Parameters:
        - value: Confidence score between 0 and 1
        """
    
    @property
    def value(self) -> float: ...
    
    def is_greater_than_threshold(self, threshold: float) -> bool:
        """
        Check if score exceeds threshold.
        
        Parameters:
        - threshold (float): Threshold value
        
        Returns:
        bool: True if score > threshold
        """
    
    def __eq__(self, threshold: float) -> bool: ...
    def __gt__(self, threshold: float) -> bool: ...
    def __lt__(self, threshold: float) -> bool: ...

PredictionResult

Container for prediction results with image data and export capabilities.

class PredictionResult:
    def __init__(
        self,
        object_prediction_list: List[ObjectPrediction],
        image: Image.Image,
        durations_in_seconds: Optional[Dict] = None,
    ):
        """
        Initialize prediction result container.
        
        Parameters:
        - object_prediction_list: List of predictions
        - image: Original PIL Image
        - durations_in_seconds: Timing profiling data
        """
    
    @property
    def object_prediction_list(self) -> List[ObjectPrediction]: ...
    
    @property
    def image(self) -> Image.Image: ...
    
    def export_visuals(self, export_dir: str, text_size: float = None):
        """
        Export visualization images to directory.
        
        Parameters:
        - export_dir (str): Output directory path
        - text_size (float, optional): Text size for labels
        """
    
    def to_coco_annotations(self) -> List["CocoAnnotation"]:
        """Convert predictions to COCO annotation list."""
    
    def to_coco_predictions(self) -> List["CocoPrediction"]:
        """Convert to COCO prediction list."""
    
    def to_imantics_annotations(self) -> List:
        """Convert to Imantics annotation list."""
    
    def to_fiftyone_detections(self) -> List:
        """Convert to FiftyOne detection list."""

Usage Examples

Creating Annotations

from sahi import BoundingBox, Category, Mask, ObjectAnnotation
import numpy as np

# Create bounding box
bbox = BoundingBox(box=[10, 20, 100, 80])
print(f"Area: {bbox.area}")
print(f"COCO format: {bbox.to_coco_bbox()}")

# Create category
category = Category(id=1, name="person")

# Create mask from boolean array
bool_mask = np.random.rand(100, 100) > 0.5
mask = Mask.from_bool_mask(bool_mask)

# Create complete annotation
annotation = ObjectAnnotation(
    bbox=bbox,
    category=category,
    mask=mask
)

Coordinate Transformations

# Create bbox with shift amount for coordinate mapping
bbox = BoundingBox(
    box=[50, 60, 150, 160], 
    shift_amount=(100, 100)
)

# Get shifted coordinates
shifted_bbox = bbox.get_shifted_box()
print(f"Original: {bbox.to_xyxy()}")
print(f"Shifted: {shifted_bbox.to_xyxy()}")

# Expand bounding box
expanded = bbox.get_expanded_box(ratio=0.2)  # 20% expansion

Format Conversions

from sahi.annotation import ObjectAnnotation

# Create from COCO format
coco_bbox = [10, 20, 50, 60]  # [x, y, width, height]
annotation = ObjectAnnotation.from_coco_bbox(
    bbox=coco_bbox,
    category_id=1,
    category_name="person"
)

# Convert to different formats
coco_annotation = annotation.to_coco_annotation()
shapely_annotation = annotation.to_shapely_annotation()

# Work with different coordinate systems
voc_bbox = annotation.bbox.to_voc_bbox()  # [xmin, ymin, xmax, ymax]
xyxy_bbox = annotation.bbox.to_xyxy()     # [xmin, ymin, xmax, ymax] as floats

Working with Predictions

from sahi.prediction import ObjectPrediction, PredictionScore

# Create prediction with confidence
score = PredictionScore(0.85)
prediction = ObjectPrediction(
    bbox=BoundingBox([10, 20, 100, 80]),
    category=Category(id=0, name="person"),
    score=score
)

# Check confidence threshold
if prediction.score.is_greater_than_threshold(0.5):
    print("High confidence detection")

# Convert to different output formats
coco_pred = prediction.to_coco_prediction()
fiftyone_det = prediction.to_fiftyone_detection()

Install with Tessl CLI