0
# Annotation Framework
1
2
SAHI provides a comprehensive annotation framework with data structures for handling bounding boxes, masks, categories, and complete object annotations. The framework supports multiple format conversions and provides consistent APIs for manipulation across different computer vision tasks.
3
4
## Capabilities
5
6
### BoundingBox
7
8
Immutable dataclass representing rectangular regions with coordinates and optional shift amounts for coordinate transformation.
9
10
```python { .api }
11
@dataclass(frozen=True)
12
class BoundingBox:
13
box: Union[Tuple[float, float, float, float], List[float]]
14
shift_amount: Tuple[int, int] = (0, 0)
15
16
def __post_init__(self): ...
17
18
@property
19
def minx(self) -> float: ...
20
21
@property
22
def miny(self) -> float: ...
23
24
@property
25
def maxx(self) -> float: ...
26
27
@property
28
def maxy(self) -> float: ...
29
30
@property
31
def area(self) -> float: ...
32
33
def get_expanded_box(self, ratio: float = 0.1) -> "BoundingBox":
34
"""
35
Return expanded bounding box by specified ratio.
36
37
Parameters:
38
- ratio (float): Expansion ratio (0.1 = 10% expansion)
39
40
Returns:
41
BoundingBox: New expanded bounding box
42
"""
43
44
def to_xywh(self) -> List[float]:
45
"""
46
Convert to [xmin, ymin, width, height] format.
47
48
Returns:
49
List[float]: Bounding box in xywh format
50
"""
51
52
def to_coco_bbox(self) -> List[float]:
53
"""
54
Convert to COCO format [xmin, ymin, width, height].
55
56
Returns:
57
List[float]: COCO format bounding box
58
"""
59
60
def to_xyxy(self) -> List[float]:
61
"""
62
Convert to [xmin, ymin, xmax, ymax] format.
63
64
Returns:
65
List[float]: Bounding box in xyxy format
66
"""
67
68
def to_voc_bbox(self) -> List[int]:
69
"""
70
Convert to VOC format [xmin, ymin, xmax, ymax] as integers.
71
72
Returns:
73
List[int]: VOC format bounding box
74
"""
75
76
def get_shifted_box(self) -> "BoundingBox":
77
"""
78
Return shifted BoundingBox using the shift_amount.
79
80
Returns:
81
BoundingBox: New shifted bounding box
82
"""
83
```
84
85
### Category
86
87
Immutable dataclass for object categories with ID and name fields.
88
89
```python { .api }
90
@dataclass(frozen=True)
91
class Category:
92
id: Optional[Union[int, str]] = None
93
name: Optional[str] = None
94
95
def __post_init__(self): ...
96
```
97
98
### Mask
99
100
Segmentation mask class handling COCO format polygon segmentation and boolean masks.
101
102
```python { .api }
103
class Mask:
104
def __init__(
105
self,
106
segmentation: List[List[float]],
107
full_shape: List[int],
108
shift_amount: list = [0, 0],
109
):
110
"""
111
Initialize mask from COCO segmentation format.
112
113
Parameters:
114
- segmentation (List[List[float]]): COCO format polygon segmentation coordinates
115
- full_shape (List[int]): Full image dimensions [height, width]
116
- shift_amount (list): Coordinate shift [shift_x, shift_y]
117
"""
118
119
@property
120
def bool_mask(self) -> np.ndarray: ...
121
122
@property
123
def segmentation(self) -> List: ...
124
125
@property
126
def area(self) -> int: ...
127
128
@classmethod
129
def from_float_mask(
130
cls,
131
mask: np.ndarray,
132
full_shape: List[int],
133
mask_threshold: float = 0.5,
134
shift_amount: list = [0, 0],
135
) -> "Mask":
136
"""
137
Create mask from float numpy array using threshold.
138
139
Parameters:
140
- mask (np.ndarray): Float mask array (0-1 values)
141
- full_shape (List[int]): Full image dimensions [height, width]
142
- mask_threshold (float): Threshold for converting to boolean
143
- shift_amount (list): Coordinate shift [shift_x, shift_y]
144
145
Returns:
146
Mask: New Mask instance
147
"""
148
149
@classmethod
150
def from_bool_mask(
151
cls,
152
bool_mask: np.ndarray,
153
full_shape: List[int],
154
shift_amount: list = [0, 0],
155
) -> "Mask":
156
"""
157
Create mask from boolean numpy array.
158
159
Parameters:
160
- bool_mask (np.ndarray): Boolean mask array
161
- full_shape (List[int]): Full image dimensions [height, width]
162
- shift_amount (list): Coordinate shift [shift_x, shift_y]
163
164
Returns:
165
Mask: New Mask instance
166
"""
167
168
def get_shifted_mask(self) -> "Mask":
169
"""
170
Return shifted mask using shift_amount.
171
172
Returns:
173
Mask: New shifted mask
174
"""
175
```
176
177
### ObjectAnnotation
178
179
Complete annotation combining bounding box, mask, and category information with extensive format conversion capabilities.
180
181
```python { .api }
182
class ObjectAnnotation:
183
def __init__(
184
self,
185
bbox: Optional[BoundingBox] = None,
186
category: Optional[Category] = None,
187
mask: Optional[Mask] = None,
188
shift_amount: Optional[List[int]] = None,
189
full_shape: Optional[List[int]] = None,
190
):
191
"""
192
Initialize complete object annotation.
193
194
Parameters:
195
- bbox (BoundingBox, optional): Bounding box
196
- category (Category, optional): Object category
197
- mask (Mask, optional): Segmentation mask
198
- shift_amount (List[int], optional): Coordinate shift [x, y]
199
- full_shape (List[int], optional): Full image shape [height, width]
200
"""
201
202
@property
203
def area(self) -> Union[int, float]: ...
204
205
@classmethod
206
def from_bool_mask(
207
cls,
208
bool_mask: np.ndarray,
209
full_shape: List[int],
210
category_id: Optional[int] = None,
211
category_name: Optional[str] = None,
212
shift_amount: List[int] = [0, 0],
213
) -> "ObjectAnnotation":
214
"""
215
Create annotation from boolean mask.
216
217
Parameters:
218
- bool_mask (np.ndarray): Boolean segmentation mask
219
- full_shape (List[int]): Full image dimensions [height, width]
220
- category_id (int, optional): Category ID
221
- category_name (str, optional): Category name
222
- shift_amount (List[int]): Coordinate shift
223
224
Returns:
225
ObjectAnnotation: New annotation instance
226
"""
227
228
@classmethod
229
def from_coco_segmentation(
230
cls,
231
segmentation: List,
232
full_shape: List[int],
233
category_id: Optional[int] = None,
234
category_name: Optional[str] = None,
235
shift_amount: List[int] = [0, 0],
236
) -> "ObjectAnnotation":
237
"""
238
Create annotation from COCO segmentation format.
239
240
Parameters:
241
- segmentation (List): COCO format polygon segmentation
242
- full_shape (List[int]): Full image dimensions
243
- category_id (int, optional): Category ID
244
- category_name (str, optional): Category name
245
- shift_amount (List[int]): Coordinate shift
246
247
Returns:
248
ObjectAnnotation: New annotation instance
249
"""
250
251
@classmethod
252
def from_coco_bbox(
253
cls,
254
bbox: List[Union[int, float]],
255
category_id: Optional[int] = None,
256
category_name: Optional[str] = None,
257
shift_amount: List[int] = [0, 0],
258
) -> "ObjectAnnotation":
259
"""
260
Create annotation from COCO bounding box format.
261
262
Parameters:
263
- bbox (List): COCO format bbox [x, y, width, height]
264
- category_id (int, optional): Category ID
265
- category_name (str, optional): Category name
266
- shift_amount (List[int]): Coordinate shift
267
268
Returns:
269
ObjectAnnotation: New annotation instance
270
"""
271
272
@classmethod
273
def from_coco_annotation_dict(
274
cls,
275
annotation_dict: Dict,
276
full_shape: List[int],
277
shift_amount: List[int] = [0, 0],
278
) -> "ObjectAnnotation":
279
"""
280
Create annotation from COCO annotation dictionary.
281
282
Parameters:
283
- annotation_dict (Dict): COCO annotation dictionary
284
- full_shape (List[int]): Full image dimensions
285
- shift_amount (List[int]): Coordinate shift
286
287
Returns:
288
ObjectAnnotation: New annotation instance
289
"""
290
291
def to_coco_annotation(self) -> "CocoAnnotation":
292
"""Convert to CocoAnnotation format."""
293
294
def to_coco_prediction(self) -> "CocoPrediction":
295
"""Convert to CocoPrediction format."""
296
297
def to_shapely_annotation(self) -> "ShapelyAnnotation":
298
"""Convert to Shapely annotation format."""
299
300
def to_imantics_annotation(self):
301
"""Convert to Imantics annotation format."""
302
303
def deepcopy(self) -> "ObjectAnnotation":
304
"""Return deep copy of annotation."""
305
306
def get_shifted_object_annotation(self) -> "ObjectAnnotation":
307
"""Return shifted annotation using shift_amount."""
308
```
309
310
### ObjectPrediction
311
312
Object detection prediction with confidence score, inheriting from ObjectAnnotation with additional prediction-specific methods.
313
314
```python { .api }
315
class ObjectPrediction(ObjectAnnotation):
316
def __init__(
317
self,
318
bbox: Optional[List[int]] = None,
319
category_id: Optional[int] = None,
320
category_name: Optional[str] = None,
321
segmentation: Optional[List[List[float]]] = None,
322
score: float = 0.0,
323
shift_amount: Optional[List[int]] = [0, 0],
324
full_shape: Optional[List[int]] = None,
325
):
326
"""
327
Initialize object prediction with confidence score.
328
329
Parameters:
330
- bbox (List[int], optional): Bounding box coordinates [minx, miny, maxx, maxy]
331
- category_id (int, optional): Category ID
332
- category_name (str, optional): Category name
333
- segmentation (List[List[float]], optional): COCO format polygon segmentation
334
- score (float): Confidence score between 0 and 1
335
- shift_amount (List[int], optional): Coordinate shift [shift_x, shift_y]
336
- full_shape (List[int], optional): Full image dimensions [height, width]
337
"""
338
339
def get_shifted_object_prediction(self) -> "ObjectPrediction":
340
"""
341
Return shifted prediction for full image coordinate mapping.
342
343
Returns:
344
ObjectPrediction: New shifted prediction
345
"""
346
347
def to_coco_prediction(self) -> "CocoPrediction":
348
"""
349
Convert to COCO prediction format.
350
351
Returns:
352
CocoPrediction: COCO format prediction
353
"""
354
355
def to_fiftyone_detection(self):
356
"""
357
Convert to FiftyOne detection format.
358
359
Returns:
360
FiftyOne Detection object
361
"""
362
```
363
364
### PredictionScore
365
366
Wrapper for prediction confidence scores with comparison operations.
367
368
```python { .api }
369
class PredictionScore:
370
def __init__(self, value: Union[float, np.ndarray]):
371
"""
372
Initialize prediction score.
373
374
Parameters:
375
- value: Confidence score between 0 and 1
376
"""
377
378
@property
379
def value(self) -> float: ...
380
381
def is_greater_than_threshold(self, threshold: float) -> bool:
382
"""
383
Check if score exceeds threshold.
384
385
Parameters:
386
- threshold (float): Threshold value
387
388
Returns:
389
bool: True if score > threshold
390
"""
391
392
def __eq__(self, threshold: float) -> bool: ...
393
def __gt__(self, threshold: float) -> bool: ...
394
def __lt__(self, threshold: float) -> bool: ...
395
```
396
397
### PredictionResult
398
399
Container for prediction results with image data and export capabilities.
400
401
```python { .api }
402
class PredictionResult:
403
def __init__(
404
self,
405
object_prediction_list: List[ObjectPrediction],
406
image: Image.Image,
407
durations_in_seconds: Optional[Dict] = None,
408
):
409
"""
410
Initialize prediction result container.
411
412
Parameters:
413
- object_prediction_list: List of predictions
414
- image: Original PIL Image
415
- durations_in_seconds: Timing profiling data
416
"""
417
418
@property
419
def object_prediction_list(self) -> List[ObjectPrediction]: ...
420
421
@property
422
def image(self) -> Image.Image: ...
423
424
def export_visuals(self, export_dir: str, text_size: float = None):
425
"""
426
Export visualization images to directory.
427
428
Parameters:
429
- export_dir (str): Output directory path
430
- text_size (float, optional): Text size for labels
431
"""
432
433
def to_coco_annotations(self) -> List["CocoAnnotation"]:
434
"""Convert predictions to COCO annotation list."""
435
436
def to_coco_predictions(self) -> List["CocoPrediction"]:
437
"""Convert to COCO prediction list."""
438
439
def to_imantics_annotations(self) -> List:
440
"""Convert to Imantics annotation list."""
441
442
def to_fiftyone_detections(self) -> List:
443
"""Convert to FiftyOne detection list."""
444
```
445
446
## Usage Examples
447
448
### Creating Annotations
449
450
```python
451
from sahi import BoundingBox, Category, Mask, ObjectAnnotation
452
import numpy as np
453
454
# Create bounding box
455
bbox = BoundingBox(box=[10, 20, 100, 80])
456
print(f"Area: {bbox.area}")
457
print(f"COCO format: {bbox.to_coco_bbox()}")
458
459
# Create category
460
category = Category(id=1, name="person")
461
462
# Create mask from boolean array
463
bool_mask = np.random.rand(100, 100) > 0.5
464
mask = Mask.from_bool_mask(bool_mask)
465
466
# Create complete annotation
467
annotation = ObjectAnnotation(
468
bbox=bbox,
469
category=category,
470
mask=mask
471
)
472
```
473
474
### Coordinate Transformations
475
476
```python
477
# Create bbox with shift amount for coordinate mapping
478
bbox = BoundingBox(
479
box=[50, 60, 150, 160],
480
shift_amount=(100, 100)
481
)
482
483
# Get shifted coordinates
484
shifted_bbox = bbox.get_shifted_box()
485
print(f"Original: {bbox.to_xyxy()}")
486
print(f"Shifted: {shifted_bbox.to_xyxy()}")
487
488
# Expand bounding box
489
expanded = bbox.get_expanded_box(ratio=0.2) # 20% expansion
490
```
491
492
### Format Conversions
493
494
```python
495
from sahi.annotation import ObjectAnnotation
496
497
# Create from COCO format
498
coco_bbox = [10, 20, 50, 60] # [x, y, width, height]
499
annotation = ObjectAnnotation.from_coco_bbox(
500
bbox=coco_bbox,
501
category_id=1,
502
category_name="person"
503
)
504
505
# Convert to different formats
506
coco_annotation = annotation.to_coco_annotation()
507
shapely_annotation = annotation.to_shapely_annotation()
508
509
# Work with different coordinate systems
510
voc_bbox = annotation.bbox.to_voc_bbox() # [xmin, ymin, xmax, ymax]
511
xyxy_bbox = annotation.bbox.to_xyxy() # [xmin, ymin, xmax, ymax] as floats
512
```
513
514
### Working with Predictions
515
516
```python
517
from sahi.prediction import ObjectPrediction, PredictionScore
518
519
# Create prediction with confidence
520
score = PredictionScore(0.85)
521
prediction = ObjectPrediction(
522
bbox=BoundingBox([10, 20, 100, 80]),
523
category=Category(id=0, name="person"),
524
score=score
525
)
526
527
# Check confidence threshold
528
if prediction.score.is_greater_than_threshold(0.5):
529
print("High confidence detection")
530
531
# Convert to different output formats
532
coco_pred = prediction.to_coco_prediction()
533
fiftyone_det = prediction.to_fiftyone_detection()
534
```