A vision library for performing sliced inference on large images/small objects
—
SAHI provides a comprehensive annotation framework with data structures for handling bounding boxes, masks, categories, and complete object annotations. The framework supports multiple format conversions and provides consistent APIs for manipulation across different computer vision tasks.
Immutable dataclass representing rectangular regions with coordinates and optional shift amounts for coordinate transformation.
@dataclass(frozen=True)
class BoundingBox:
box: Union[Tuple[float, float, float, float], List[float]]
shift_amount: Tuple[int, int] = (0, 0)
def __post_init__(self): ...
@property
def minx(self) -> float: ...
@property
def miny(self) -> float: ...
@property
def maxx(self) -> float: ...
@property
def maxy(self) -> float: ...
@property
def area(self) -> float: ...
def get_expanded_box(self, ratio: float = 0.1) -> "BoundingBox":
"""
Return expanded bounding box by specified ratio.
Parameters:
- ratio (float): Expansion ratio (0.1 = 10% expansion)
Returns:
BoundingBox: New expanded bounding box
"""
def to_xywh(self) -> List[float]:
"""
Convert to [xmin, ymin, width, height] format.
Returns:
List[float]: Bounding box in xywh format
"""
def to_coco_bbox(self) -> List[float]:
"""
Convert to COCO format [xmin, ymin, width, height].
Returns:
List[float]: COCO format bounding box
"""
def to_xyxy(self) -> List[float]:
"""
Convert to [xmin, ymin, xmax, ymax] format.
Returns:
List[float]: Bounding box in xyxy format
"""
def to_voc_bbox(self) -> List[int]:
"""
Convert to VOC format [xmin, ymin, xmax, ymax] as integers.
Returns:
List[int]: VOC format bounding box
"""
def get_shifted_box(self) -> "BoundingBox":
"""
Return shifted BoundingBox using the shift_amount.
Returns:
BoundingBox: New shifted bounding box
"""Immutable dataclass for object categories with ID and name fields.
@dataclass(frozen=True)
class Category:
id: Optional[Union[int, str]] = None
name: Optional[str] = None
def __post_init__(self): ...Segmentation mask class handling COCO format polygon segmentation and boolean masks.
class Mask:
def __init__(
self,
segmentation: List[List[float]],
full_shape: List[int],
shift_amount: list = [0, 0],
):
"""
Initialize mask from COCO segmentation format.
Parameters:
- segmentation (List[List[float]]): COCO format polygon segmentation coordinates
- full_shape (List[int]): Full image dimensions [height, width]
- shift_amount (list): Coordinate shift [shift_x, shift_y]
"""
@property
def bool_mask(self) -> np.ndarray: ...
@property
def segmentation(self) -> List: ...
@property
def area(self) -> int: ...
@classmethod
def from_float_mask(
cls,
mask: np.ndarray,
full_shape: List[int],
mask_threshold: float = 0.5,
shift_amount: list = [0, 0],
) -> "Mask":
"""
Create mask from float numpy array using threshold.
Parameters:
- mask (np.ndarray): Float mask array (0-1 values)
- full_shape (List[int]): Full image dimensions [height, width]
- mask_threshold (float): Threshold for converting to boolean
- shift_amount (list): Coordinate shift [shift_x, shift_y]
Returns:
Mask: New Mask instance
"""
@classmethod
def from_bool_mask(
cls,
bool_mask: np.ndarray,
full_shape: List[int],
shift_amount: list = [0, 0],
) -> "Mask":
"""
Create mask from boolean numpy array.
Parameters:
- bool_mask (np.ndarray): Boolean mask array
- full_shape (List[int]): Full image dimensions [height, width]
- shift_amount (list): Coordinate shift [shift_x, shift_y]
Returns:
Mask: New Mask instance
"""
def get_shifted_mask(self) -> "Mask":
"""
Return shifted mask using shift_amount.
Returns:
Mask: New shifted mask
"""Complete annotation combining bounding box, mask, and category information with extensive format conversion capabilities.
class ObjectAnnotation:
def __init__(
self,
bbox: Optional[BoundingBox] = None,
category: Optional[Category] = None,
mask: Optional[Mask] = None,
shift_amount: Optional[List[int]] = None,
full_shape: Optional[List[int]] = None,
):
"""
Initialize complete object annotation.
Parameters:
- bbox (BoundingBox, optional): Bounding box
- category (Category, optional): Object category
- mask (Mask, optional): Segmentation mask
- shift_amount (List[int], optional): Coordinate shift [x, y]
- full_shape (List[int], optional): Full image shape [height, width]
"""
@property
def area(self) -> Union[int, float]: ...
@classmethod
def from_bool_mask(
cls,
bool_mask: np.ndarray,
full_shape: List[int],
category_id: Optional[int] = None,
category_name: Optional[str] = None,
shift_amount: List[int] = [0, 0],
) -> "ObjectAnnotation":
"""
Create annotation from boolean mask.
Parameters:
- bool_mask (np.ndarray): Boolean segmentation mask
- full_shape (List[int]): Full image dimensions [height, width]
- category_id (int, optional): Category ID
- category_name (str, optional): Category name
- shift_amount (List[int]): Coordinate shift
Returns:
ObjectAnnotation: New annotation instance
"""
@classmethod
def from_coco_segmentation(
cls,
segmentation: List,
full_shape: List[int],
category_id: Optional[int] = None,
category_name: Optional[str] = None,
shift_amount: List[int] = [0, 0],
) -> "ObjectAnnotation":
"""
Create annotation from COCO segmentation format.
Parameters:
- segmentation (List): COCO format polygon segmentation
- full_shape (List[int]): Full image dimensions
- category_id (int, optional): Category ID
- category_name (str, optional): Category name
- shift_amount (List[int]): Coordinate shift
Returns:
ObjectAnnotation: New annotation instance
"""
@classmethod
def from_coco_bbox(
cls,
bbox: List[Union[int, float]],
category_id: Optional[int] = None,
category_name: Optional[str] = None,
shift_amount: List[int] = [0, 0],
) -> "ObjectAnnotation":
"""
Create annotation from COCO bounding box format.
Parameters:
- bbox (List): COCO format bbox [x, y, width, height]
- category_id (int, optional): Category ID
- category_name (str, optional): Category name
- shift_amount (List[int]): Coordinate shift
Returns:
ObjectAnnotation: New annotation instance
"""
@classmethod
def from_coco_annotation_dict(
cls,
annotation_dict: Dict,
full_shape: List[int],
shift_amount: List[int] = [0, 0],
) -> "ObjectAnnotation":
"""
Create annotation from COCO annotation dictionary.
Parameters:
- annotation_dict (Dict): COCO annotation dictionary
- full_shape (List[int]): Full image dimensions
- shift_amount (List[int]): Coordinate shift
Returns:
ObjectAnnotation: New annotation instance
"""
def to_coco_annotation(self) -> "CocoAnnotation":
"""Convert to CocoAnnotation format."""
def to_coco_prediction(self) -> "CocoPrediction":
"""Convert to CocoPrediction format."""
def to_shapely_annotation(self) -> "ShapelyAnnotation":
"""Convert to Shapely annotation format."""
def to_imantics_annotation(self):
"""Convert to Imantics annotation format."""
def deepcopy(self) -> "ObjectAnnotation":
"""Return deep copy of annotation."""
def get_shifted_object_annotation(self) -> "ObjectAnnotation":
"""Return shifted annotation using shift_amount."""Object detection prediction with confidence score, inheriting from ObjectAnnotation with additional prediction-specific methods.
class ObjectPrediction(ObjectAnnotation):
def __init__(
self,
bbox: Optional[List[int]] = None,
category_id: Optional[int] = None,
category_name: Optional[str] = None,
segmentation: Optional[List[List[float]]] = None,
score: float = 0.0,
shift_amount: Optional[List[int]] = [0, 0],
full_shape: Optional[List[int]] = None,
):
"""
Initialize object prediction with confidence score.
Parameters:
- bbox (List[int], optional): Bounding box coordinates [minx, miny, maxx, maxy]
- category_id (int, optional): Category ID
- category_name (str, optional): Category name
- segmentation (List[List[float]], optional): COCO format polygon segmentation
- score (float): Confidence score between 0 and 1
- shift_amount (List[int], optional): Coordinate shift [shift_x, shift_y]
- full_shape (List[int], optional): Full image dimensions [height, width]
"""
def get_shifted_object_prediction(self) -> "ObjectPrediction":
"""
Return shifted prediction for full image coordinate mapping.
Returns:
ObjectPrediction: New shifted prediction
"""
def to_coco_prediction(self) -> "CocoPrediction":
"""
Convert to COCO prediction format.
Returns:
CocoPrediction: COCO format prediction
"""
def to_fiftyone_detection(self):
"""
Convert to FiftyOne detection format.
Returns:
FiftyOne Detection object
"""Wrapper for prediction confidence scores with comparison operations.
class PredictionScore:
def __init__(self, value: Union[float, np.ndarray]):
"""
Initialize prediction score.
Parameters:
- value: Confidence score between 0 and 1
"""
@property
def value(self) -> float: ...
def is_greater_than_threshold(self, threshold: float) -> bool:
"""
Check if score exceeds threshold.
Parameters:
- threshold (float): Threshold value
Returns:
bool: True if score > threshold
"""
def __eq__(self, threshold: float) -> bool: ...
def __gt__(self, threshold: float) -> bool: ...
def __lt__(self, threshold: float) -> bool: ...Container for prediction results with image data and export capabilities.
class PredictionResult:
def __init__(
self,
object_prediction_list: List[ObjectPrediction],
image: Image.Image,
durations_in_seconds: Optional[Dict] = None,
):
"""
Initialize prediction result container.
Parameters:
- object_prediction_list: List of predictions
- image: Original PIL Image
- durations_in_seconds: Timing profiling data
"""
@property
def object_prediction_list(self) -> List[ObjectPrediction]: ...
@property
def image(self) -> Image.Image: ...
def export_visuals(self, export_dir: str, text_size: float = None):
"""
Export visualization images to directory.
Parameters:
- export_dir (str): Output directory path
- text_size (float, optional): Text size for labels
"""
def to_coco_annotations(self) -> List["CocoAnnotation"]:
"""Convert predictions to COCO annotation list."""
def to_coco_predictions(self) -> List["CocoPrediction"]:
"""Convert to COCO prediction list."""
def to_imantics_annotations(self) -> List:
"""Convert to Imantics annotation list."""
def to_fiftyone_detections(self) -> List:
"""Convert to FiftyOne detection list."""from sahi import BoundingBox, Category, Mask, ObjectAnnotation
import numpy as np
# Create bounding box
bbox = BoundingBox(box=[10, 20, 100, 80])
print(f"Area: {bbox.area}")
print(f"COCO format: {bbox.to_coco_bbox()}")
# Create category
category = Category(id=1, name="person")
# Create mask from boolean array
bool_mask = np.random.rand(100, 100) > 0.5
mask = Mask.from_bool_mask(bool_mask)
# Create complete annotation
annotation = ObjectAnnotation(
bbox=bbox,
category=category,
mask=mask
)# Create bbox with shift amount for coordinate mapping
bbox = BoundingBox(
box=[50, 60, 150, 160],
shift_amount=(100, 100)
)
# Get shifted coordinates
shifted_bbox = bbox.get_shifted_box()
print(f"Original: {bbox.to_xyxy()}")
print(f"Shifted: {shifted_bbox.to_xyxy()}")
# Expand bounding box
expanded = bbox.get_expanded_box(ratio=0.2) # 20% expansionfrom sahi.annotation import ObjectAnnotation
# Create from COCO format
coco_bbox = [10, 20, 50, 60] # [x, y, width, height]
annotation = ObjectAnnotation.from_coco_bbox(
bbox=coco_bbox,
category_id=1,
category_name="person"
)
# Convert to different formats
coco_annotation = annotation.to_coco_annotation()
shapely_annotation = annotation.to_shapely_annotation()
# Work with different coordinate systems
voc_bbox = annotation.bbox.to_voc_bbox() # [xmin, ymin, xmax, ymax]
xyxy_bbox = annotation.bbox.to_xyxy() # [xmin, ymin, xmax, ymax] as floatsfrom sahi.prediction import ObjectPrediction, PredictionScore
# Create prediction with confidence
score = PredictionScore(0.85)
prediction = ObjectPrediction(
bbox=BoundingBox([10, 20, 100, 80]),
category=Category(id=0, name="person"),
score=score
)
# Check confidence threshold
if prediction.score.is_greater_than_threshold(0.5):
print("High confidence detection")
# Convert to different output formats
coco_pred = prediction.to_coco_prediction()
fiftyone_det = prediction.to_fiftyone_detection()Install with Tessl CLI
npx tessl i tessl/pypi-sahi