A vision library for performing sliced inference on large images/small objects
—
SAHI provides comprehensive COCO dataset integration including loading, manipulation, annotation processing, evaluation, and format conversion capabilities. The framework supports the complete COCO ecosystem with seamless integration for both images and videos.
Main class for loading and manipulating COCO format datasets with full annotation support.
class Coco:
def __init__(self, coco_path: Optional[str] = None):
"""
Initialize COCO dataset from JSON file.
Parameters:
- coco_path (str, optional): Path to COCO format JSON file
"""
@property
def json(self) -> Dict: ...
@property
def images(self) -> List[CocoImage]: ...
@property
def annotations(self) -> List[CocoAnnotation]: ...
@property
def categories(self) -> List[CocoCategory]: ...
def add_image(self, coco_image: CocoImage) -> int:
"""
Add image to dataset.
Parameters:
- coco_image: CocoImage instance
Returns:
int: Assigned image ID
"""
def add_annotation(self, coco_annotation: CocoAnnotation) -> int:
"""
Add annotation to dataset.
Parameters:
- coco_annotation: CocoAnnotation instance
Returns:
int: Assigned annotation ID
"""
def add_category(self, coco_category: CocoCategory) -> int:
"""
Add category to dataset.
Parameters:
- coco_category: CocoCategory instance
Returns:
int: Assigned category ID
"""
def merge(self, coco2: "Coco") -> "Coco":
"""
Merge with another COCO dataset.
Parameters:
- coco2: Another Coco instance to merge
Returns:
Coco: New merged dataset
"""
def export_as_yolo(
self,
output_dir: str,
train_split_rate: float = 1.0,
numpy_seed: int = 0,
mp: bool = True,
):
"""
Export dataset in YOLO format.
Parameters:
- output_dir (str): Output directory for YOLO files
- train_split_rate (float): Fraction for training set (0-1)
- numpy_seed (int): Random seed for reproducible splits
- mp (bool): Use multiprocessing for faster conversion
"""
def save(self, save_path: str):
"""Save COCO dataset to JSON file."""
def get_image_list(self) -> List[CocoImage]:
"""Get list of all images in dataset."""
def get_category_list(self) -> List[CocoCategory]:
"""Get list of all categories in dataset."""Represents individual images in COCO datasets with annotation management.
class CocoImage:
def __init__(
self,
image_path: str,
image_id: Optional[int] = None
):
"""
Initialize COCO image.
Parameters:
- image_path (str): Path to image file
- image_id (int, optional): Unique image identifier
"""
@property
def json(self) -> Dict: ...
@property
def annotations(self) -> List[CocoAnnotation]: ...
@property
def image_path(self) -> str: ...
@property
def file_name(self) -> str: ...
def add_annotation(self, annotation: CocoAnnotation):
"""
Add annotation to this image.
Parameters:
- annotation: CocoAnnotation instance
"""
def get_annotation_list(self) -> List[CocoAnnotation]:
"""Get all annotations for this image."""Represents object annotations with bounding boxes and segmentation masks.
class CocoAnnotation:
def __init__(
self,
bbox: Optional[List[int]] = None,
category_id: Optional[int] = None,
category_name: Optional[str] = None,
iscrowd: int = 0,
area: Optional[int] = None,
segmentation: Optional[List] = None,
image_id: Optional[int] = None,
annotation_id: Optional[int] = None,
):
"""
Initialize COCO annotation.
Parameters:
- bbox (List[int], optional): Bounding box [x, y, width, height]
- category_id (int, optional): Category identifier
- category_name (str, optional): Category name
- iscrowd (int): Whether annotation represents crowd (0 or 1)
- area (int, optional): Annotation area in pixels
- segmentation (List, optional): Polygon segmentation
- image_id (int, optional): Associated image ID
- annotation_id (int, optional): Unique annotation ID
"""
@property
def json(self) -> Dict: ...
@property
def area(self) -> int: ...
@property
def bbox(self) -> List[int]: ...
def update_bbox_stats(self):
"""Update bounding box statistics from segmentation."""Represents model predictions in COCO format with confidence scores.
class CocoPrediction:
def __init__(
self,
bbox: List[int],
category_id: int,
score: float,
category_name: Optional[str] = None,
segmentation: Optional[List] = None,
image_id: Optional[int] = None,
):
"""
Initialize COCO prediction.
Parameters:
- bbox (List[int]): Bounding box [x, y, width, height]
- category_id (int): Predicted category ID
- score (float): Confidence score (0-1)
- category_name (str, optional): Category name
- segmentation (List, optional): Predicted segmentation mask
- image_id (int, optional): Associated image ID
"""
@property
def json(self) -> Dict: ...
@property
def score(self) -> float: ...
@property
def category_id(self) -> int: ...
@property
def bbox(self) -> List[int]: ...Represents object categories in COCO datasets.
class CocoCategory:
def __init__(
self,
category_id: int,
name: str,
supercategory: str = ""
):
"""
Initialize COCO category.
Parameters:
- category_id (int): Unique category identifier
- name (str): Category name
- supercategory (str): Parent category name
"""
@property
def json(self) -> Dict: ...
@property
def id(self) -> int: ...
@property
def name(self) -> str: ...Support for COCO-style video datasets with temporal annotations.
class CocoVideo:
def __init__(
self,
video_path: str,
video_id: Optional[int] = None
):
"""
Initialize COCO video.
Parameters:
- video_path (str): Path to video file
- video_id (int, optional): Unique video identifier
"""
class CocoVidImage(CocoImage):
"""COCO video frame with temporal information."""
class CocoVidAnnotation(CocoAnnotation):
"""COCO video annotation with track information."""
class CocoVid(Coco):
"""COCO video dataset with temporal support."""def create_coco_dict() -> Dict:
"""
Create empty COCO format dictionary structure.
Returns:
Dict: Empty COCO dictionary with required fields
"""
def merge(coco1: Coco, coco2: Coco) -> Coco:
"""
Merge two COCO datasets.
Parameters:
- coco1: First COCO dataset
- coco2: Second COCO dataset
Returns:
Coco: Merged dataset
"""
def export_coco_as_yolo(
coco_path: str,
output_dir: str,
train_split_rate: float = 1.0,
numpy_seed: int = 0,
) -> str:
"""
Convert COCO dataset to YOLO format.
Parameters:
- coco_path (str): Path to COCO JSON file
- output_dir (str): Output directory for YOLO files
- train_split_rate (float): Training set split ratio
- numpy_seed (int): Random seed for reproducible splits
Returns:
str: Path to output directory
"""class DatasetClassCounts:
def __init__(self, coco: Coco):
"""
Calculate dataset statistics and class distributions.
Parameters:
- coco: COCO dataset instance
"""
@property
def stats(self) -> Dict: ...
def get_class_distribution(self) -> Dict[str, int]:
"""Get distribution of annotations per class."""
def get_image_distribution(self) -> Dict[str, int]:
"""Get distribution of images per class."""from sahi.utils.coco import Coco, CocoImage, CocoAnnotation, CocoCategory
# Load existing COCO dataset
coco = Coco("dataset/annotations.json")
print(f"Dataset contains:")
print(f" Images: {len(coco.images)}")
print(f" Annotations: {len(coco.annotations)}")
print(f" Categories: {len(coco.categories)}")
# Access images and annotations
for image in coco.images[:5]: # First 5 images
print(f"Image: {image.file_name}")
print(f" Annotations: {len(image.annotations)}")
for annotation in image.annotations:
print(f" Category: {annotation.category_name}")
print(f" BBox: {annotation.bbox}")from sahi.utils.coco import Coco, CocoImage, CocoAnnotation, CocoCategory
# Create new empty dataset
coco = Coco()
# Add categories
person_cat = CocoCategory(category_id=1, name="person")
car_cat = CocoCategory(category_id=2, name="car")
coco.add_category(person_cat)
coco.add_category(car_cat)
# Add image
image = CocoImage(image_path="images/sample.jpg", image_id=1)
# Add annotations
annotation1 = CocoAnnotation(
bbox=[100, 100, 50, 80], # [x, y, width, height]
category_id=1,
category_name="person",
image_id=1,
annotation_id=1
)
annotation2 = CocoAnnotation(
bbox=[200, 150, 120, 60],
category_id=2,
category_name="car",
image_id=1,
annotation_id=2
)
image.add_annotation(annotation1)
image.add_annotation(annotation2)
coco.add_image(image)
# Save dataset
coco.save("new_dataset.json")from sahi.utils.coco import export_coco_as_yolo
# Convert COCO to YOLO format
output_path = export_coco_as_yolo(
coco_path="dataset/annotations.json",
output_dir="yolo_dataset/",
train_split_rate=0.8, # 80% for training, 20% for validation
numpy_seed=42 # For reproducible splits
)
print(f"YOLO dataset created at: {output_path}")from sahi.utils.coco import Coco
# Load multiple datasets
dataset1 = Coco("dataset1/annotations.json")
dataset2 = Coco("dataset2/annotations.json")
dataset3 = Coco("dataset3/annotations.json")
# Merge datasets
merged = dataset1.merge(dataset2).merge(dataset3)
print(f"Merged dataset contains:")
print(f" Images: {len(merged.images)}")
print(f" Annotations: {len(merged.annotations)}")
# Save merged dataset
merged.save("merged_dataset.json")from sahi.utils.coco import CocoPrediction
from sahi import get_sliced_prediction
# Get predictions from SAHI
result = get_sliced_prediction(
image="test_image.jpg",
detection_model=model
)
# Convert to COCO predictions
coco_predictions = result.to_coco_predictions()
# Access prediction details
for pred in coco_predictions:
print(f"Category: {pred.category_name}")
print(f"Confidence: {pred.score}")
print(f"BBox: {pred.bbox}")
# Save predictions to JSON
import json
predictions_dict = [pred.json for pred in coco_predictions]
with open("predictions.json", "w") as f:
json.dump(predictions_dict, f)from sahi.utils.coco import Coco, DatasetClassCounts
# Load dataset
coco = Coco("dataset/annotations.json")
# Analyze class distribution
stats = DatasetClassCounts(coco)
class_dist = stats.get_class_distribution()
print("Class distribution:")
for class_name, count in class_dist.items():
print(f" {class_name}: {count} annotations")
# Get images per class
image_dist = stats.get_image_distribution()
print("\nImages per class:")
for class_name, count in image_dist.items():
print(f" {class_name}: {count} images")from sahi.utils.coco import Coco
# Load dataset
coco = Coco("large_dataset.json")
# Filter dataset by category
person_images = []
for image in coco.images:
has_person = any(
ann.category_name == "person"
for ann in image.annotations
)
if has_person:
person_images.append(image)
print(f"Found {len(person_images)} images with people")
# Create subset dataset
subset_coco = Coco()
for image in person_images[:100]: # First 100 images with people
subset_coco.add_image(image)
for annotation in image.annotations:
subset_coco.add_annotation(annotation)
# Add categories
for category in coco.categories:
subset_coco.add_category(category)
subset_coco.save("person_subset.json")Install with Tessl CLI
npx tessl i tessl/pypi-sahi