Tessl Tile for pypi/sahi@0.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

annotation-framework.md cli.md coco-integration.md image-slicing.md index.md model-integration.md postprocessing.md prediction-functions.md utilities.md

annotation-framework.mddocs/

0
# Annotation Framework
1

2
SAHI provides a comprehensive annotation framework with data structures for handling bounding boxes, masks, categories, and complete object annotations. The framework supports multiple format conversions and provides consistent APIs for manipulation across different computer vision tasks.
3

4
## Capabilities
5

6
### BoundingBox
7

8
Immutable dataclass representing rectangular regions with coordinates and optional shift amounts for coordinate transformation.
9

10
```python { .api }
11
@dataclass(frozen=True)
12
class BoundingBox:
13
    box: Union[Tuple[float, float, float, float], List[float]]
14
    shift_amount: Tuple[int, int] = (0, 0)
15
    
16
    def __post_init__(self): ...
17
    
18
    @property
19
    def minx(self) -> float: ...
20
    
21
    @property  
22
    def miny(self) -> float: ...
23
    
24
    @property
25
    def maxx(self) -> float: ...
26
    
27
    @property
28
    def maxy(self) -> float: ...
29
    
30
    @property
31
    def area(self) -> float: ...
32
    
33
    def get_expanded_box(self, ratio: float = 0.1) -> "BoundingBox":
34
        """
35
        Return expanded bounding box by specified ratio.
36
        
37
        Parameters:
38
        - ratio (float): Expansion ratio (0.1 = 10% expansion)
39
        
40
        Returns:
41
        BoundingBox: New expanded bounding box
42
        """
43
    
44
    def to_xywh(self) -> List[float]:
45
        """
46
        Convert to [xmin, ymin, width, height] format.
47
        
48
        Returns:
49
        List[float]: Bounding box in xywh format
50
        """
51
    
52
    def to_coco_bbox(self) -> List[float]:
53
        """
54
        Convert to COCO format [xmin, ymin, width, height].
55
        
56
        Returns:
57
        List[float]: COCO format bounding box
58
        """
59
    
60
    def to_xyxy(self) -> List[float]:
61
        """
62
        Convert to [xmin, ymin, xmax, ymax] format.
63
        
64
        Returns:
65
        List[float]: Bounding box in xyxy format
66
        """
67
    
68
    def to_voc_bbox(self) -> List[int]:
69
        """
70
        Convert to VOC format [xmin, ymin, xmax, ymax] as integers.
71
        
72
        Returns:
73
        List[int]: VOC format bounding box
74
        """
75
    
76
    def get_shifted_box(self) -> "BoundingBox":
77
        """
78
        Return shifted BoundingBox using the shift_amount.
79
        
80
        Returns:
81
        BoundingBox: New shifted bounding box
82
        """
83
```
84

85
### Category
86

87
Immutable dataclass for object categories with ID and name fields.
88

89
```python { .api }
90
@dataclass(frozen=True)
91
class Category:
92
    id: Optional[Union[int, str]] = None
93
    name: Optional[str] = None
94
    
95
    def __post_init__(self): ...
96
```
97

98
### Mask
99

100
Segmentation mask class handling COCO format polygon segmentation and boolean masks.
101

102
```python { .api }
103
class Mask:
104
    def __init__(
105
        self,
106
        segmentation: List[List[float]],
107
        full_shape: List[int],
108
        shift_amount: list = [0, 0],
109
    ):
110
        """
111
        Initialize mask from COCO segmentation format.
112
        
113
        Parameters:
114
        - segmentation (List[List[float]]): COCO format polygon segmentation coordinates
115
        - full_shape (List[int]): Full image dimensions [height, width]
116
        - shift_amount (list): Coordinate shift [shift_x, shift_y]
117
        """
118
    
119
    @property
120
    def bool_mask(self) -> np.ndarray: ...
121
    
122
    @property  
123
    def segmentation(self) -> List: ...
124
    
125
    @property
126
    def area(self) -> int: ...
127
    
128
    @classmethod
129
    def from_float_mask(
130
        cls,
131
        mask: np.ndarray,
132
        full_shape: List[int],
133
        mask_threshold: float = 0.5,
134
        shift_amount: list = [0, 0],
135
    ) -> "Mask":
136
        """
137
        Create mask from float numpy array using threshold.
138
        
139
        Parameters:
140
        - mask (np.ndarray): Float mask array (0-1 values)
141
        - full_shape (List[int]): Full image dimensions [height, width]
142
        - mask_threshold (float): Threshold for converting to boolean
143
        - shift_amount (list): Coordinate shift [shift_x, shift_y]
144
        
145
        Returns:
146
        Mask: New Mask instance
147
        """
148
    
149
    @classmethod
150
    def from_bool_mask(
151
        cls,
152
        bool_mask: np.ndarray,
153
        full_shape: List[int],
154
        shift_amount: list = [0, 0],
155
    ) -> "Mask":
156
        """
157
        Create mask from boolean numpy array.
158
        
159
        Parameters:
160
        - bool_mask (np.ndarray): Boolean mask array
161
        - full_shape (List[int]): Full image dimensions [height, width]
162
        - shift_amount (list): Coordinate shift [shift_x, shift_y]
163
        
164
        Returns:
165
        Mask: New Mask instance
166
        """
167
    
168
    def get_shifted_mask(self) -> "Mask":
169
        """
170
        Return shifted mask using shift_amount.
171
        
172
        Returns:
173
        Mask: New shifted mask
174
        """
175
```
176

177
### ObjectAnnotation
178

179
Complete annotation combining bounding box, mask, and category information with extensive format conversion capabilities.
180

181
```python { .api }
182
class ObjectAnnotation:
183
    def __init__(
184
        self,
185
        bbox: Optional[BoundingBox] = None,
186
        category: Optional[Category] = None, 
187
        mask: Optional[Mask] = None,
188
        shift_amount: Optional[List[int]] = None,
189
        full_shape: Optional[List[int]] = None,
190
    ):
191
        """
192
        Initialize complete object annotation.
193
        
194
        Parameters:
195
        - bbox (BoundingBox, optional): Bounding box
196
        - category (Category, optional): Object category
197
        - mask (Mask, optional): Segmentation mask
198
        - shift_amount (List[int], optional): Coordinate shift [x, y]
199
        - full_shape (List[int], optional): Full image shape [height, width]
200
        """
201
    
202
    @property
203
    def area(self) -> Union[int, float]: ...
204
    
205
    @classmethod
206
    def from_bool_mask(
207
        cls,
208
        bool_mask: np.ndarray,
209
        full_shape: List[int],
210
        category_id: Optional[int] = None,
211
        category_name: Optional[str] = None,
212
        shift_amount: List[int] = [0, 0],
213
    ) -> "ObjectAnnotation":
214
        """
215
        Create annotation from boolean mask.
216
        
217
        Parameters:
218
        - bool_mask (np.ndarray): Boolean segmentation mask
219
        - full_shape (List[int]): Full image dimensions [height, width]
220
        - category_id (int, optional): Category ID
221
        - category_name (str, optional): Category name
222
        - shift_amount (List[int]): Coordinate shift
223
        
224
        Returns:
225
        ObjectAnnotation: New annotation instance
226
        """
227
    
228
    @classmethod
229
    def from_coco_segmentation(
230
        cls,
231
        segmentation: List,
232
        full_shape: List[int],
233
        category_id: Optional[int] = None,
234
        category_name: Optional[str] = None,
235
        shift_amount: List[int] = [0, 0],
236
    ) -> "ObjectAnnotation":
237
        """
238
        Create annotation from COCO segmentation format.
239
        
240
        Parameters:
241
        - segmentation (List): COCO format polygon segmentation
242
        - full_shape (List[int]): Full image dimensions
243
        - category_id (int, optional): Category ID
244
        - category_name (str, optional): Category name
245
        - shift_amount (List[int]): Coordinate shift
246
        
247
        Returns:
248
        ObjectAnnotation: New annotation instance
249
        """
250
    
251
    @classmethod
252
    def from_coco_bbox(
253
        cls,
254
        bbox: List[Union[int, float]],
255
        category_id: Optional[int] = None,
256
        category_name: Optional[str] = None,
257
        shift_amount: List[int] = [0, 0],
258
    ) -> "ObjectAnnotation":
259
        """
260
        Create annotation from COCO bounding box format.
261
        
262
        Parameters:
263
        - bbox (List): COCO format bbox [x, y, width, height]
264
        - category_id (int, optional): Category ID
265
        - category_name (str, optional): Category name
266
        - shift_amount (List[int]): Coordinate shift
267
        
268
        Returns:
269
        ObjectAnnotation: New annotation instance
270
        """
271
    
272
    @classmethod
273
    def from_coco_annotation_dict(
274
        cls,
275
        annotation_dict: Dict,
276
        full_shape: List[int],
277
        shift_amount: List[int] = [0, 0],
278
    ) -> "ObjectAnnotation":
279
        """
280
        Create annotation from COCO annotation dictionary.
281
        
282
        Parameters:
283
        - annotation_dict (Dict): COCO annotation dictionary
284
        - full_shape (List[int]): Full image dimensions
285
        - shift_amount (List[int]): Coordinate shift
286
        
287
        Returns:
288
        ObjectAnnotation: New annotation instance
289
        """
290
    
291
    def to_coco_annotation(self) -> "CocoAnnotation":
292
        """Convert to CocoAnnotation format."""
293
    
294
    def to_coco_prediction(self) -> "CocoPrediction":
295
        """Convert to CocoPrediction format."""
296
    
297
    def to_shapely_annotation(self) -> "ShapelyAnnotation":
298
        """Convert to Shapely annotation format."""
299
    
300
    def to_imantics_annotation(self):
301
        """Convert to Imantics annotation format."""
302
    
303
    def deepcopy(self) -> "ObjectAnnotation":
304
        """Return deep copy of annotation."""
305
    
306
    def get_shifted_object_annotation(self) -> "ObjectAnnotation":
307
        """Return shifted annotation using shift_amount."""
308
```
309

310
### ObjectPrediction
311

312
Object detection prediction with confidence score, inheriting from ObjectAnnotation with additional prediction-specific methods.
313

314
```python { .api }
315
class ObjectPrediction(ObjectAnnotation):
316
    def __init__(
317
        self,
318
        bbox: Optional[List[int]] = None,
319
        category_id: Optional[int] = None,
320
        category_name: Optional[str] = None,
321
        segmentation: Optional[List[List[float]]] = None,
322
        score: float = 0.0,
323
        shift_amount: Optional[List[int]] = [0, 0],
324
        full_shape: Optional[List[int]] = None,
325
    ):
326
        """
327
        Initialize object prediction with confidence score.
328
        
329
        Parameters:
330
        - bbox (List[int], optional): Bounding box coordinates [minx, miny, maxx, maxy]
331
        - category_id (int, optional): Category ID
332
        - category_name (str, optional): Category name
333
        - segmentation (List[List[float]], optional): COCO format polygon segmentation
334
        - score (float): Confidence score between 0 and 1
335
        - shift_amount (List[int], optional): Coordinate shift [shift_x, shift_y]
336
        - full_shape (List[int], optional): Full image dimensions [height, width]
337
        """
338
    
339
    def get_shifted_object_prediction(self) -> "ObjectPrediction":
340
        """
341
        Return shifted prediction for full image coordinate mapping.
342
        
343
        Returns:
344
        ObjectPrediction: New shifted prediction
345
        """
346
    
347
    def to_coco_prediction(self) -> "CocoPrediction":
348
        """
349
        Convert to COCO prediction format.
350
        
351
        Returns:
352
        CocoPrediction: COCO format prediction
353
        """
354
    
355
    def to_fiftyone_detection(self):
356
        """
357
        Convert to FiftyOne detection format.
358
        
359
        Returns:
360
        FiftyOne Detection object
361
        """
362
```
363

364
### PredictionScore
365

366
Wrapper for prediction confidence scores with comparison operations.
367

368
```python { .api }
369
class PredictionScore:
370
    def __init__(self, value: Union[float, np.ndarray]):
371
        """
372
        Initialize prediction score.
373
        
374
        Parameters:
375
        - value: Confidence score between 0 and 1
376
        """
377
    
378
    @property
379
    def value(self) -> float: ...
380
    
381
    def is_greater_than_threshold(self, threshold: float) -> bool:
382
        """
383
        Check if score exceeds threshold.
384
        
385
        Parameters:
386
        - threshold (float): Threshold value
387
        
388
        Returns:
389
        bool: True if score > threshold
390
        """
391
    
392
    def __eq__(self, threshold: float) -> bool: ...
393
    def __gt__(self, threshold: float) -> bool: ...
394
    def __lt__(self, threshold: float) -> bool: ...
395
```
396

397
### PredictionResult
398

399
Container for prediction results with image data and export capabilities.
400

401
```python { .api }
402
class PredictionResult:
403
    def __init__(
404
        self,
405
        object_prediction_list: List[ObjectPrediction],
406
        image: Image.Image,
407
        durations_in_seconds: Optional[Dict] = None,
408
    ):
409
        """
410
        Initialize prediction result container.
411
        
412
        Parameters:
413
        - object_prediction_list: List of predictions
414
        - image: Original PIL Image
415
        - durations_in_seconds: Timing profiling data
416
        """
417
    
418
    @property
419
    def object_prediction_list(self) -> List[ObjectPrediction]: ...
420
    
421
    @property
422
    def image(self) -> Image.Image: ...
423
    
424
    def export_visuals(self, export_dir: str, text_size: float = None):
425
        """
426
        Export visualization images to directory.
427
        
428
        Parameters:
429
        - export_dir (str): Output directory path
430
        - text_size (float, optional): Text size for labels
431
        """
432
    
433
    def to_coco_annotations(self) -> List["CocoAnnotation"]:
434
        """Convert predictions to COCO annotation list."""
435
    
436
    def to_coco_predictions(self) -> List["CocoPrediction"]:
437
        """Convert to COCO prediction list."""
438
    
439
    def to_imantics_annotations(self) -> List:
440
        """Convert to Imantics annotation list."""
441
    
442
    def to_fiftyone_detections(self) -> List:
443
        """Convert to FiftyOne detection list."""
444
```
445

446
## Usage Examples
447

448
### Creating Annotations
449

450
```python
451
from sahi import BoundingBox, Category, Mask, ObjectAnnotation
452
import numpy as np
453

454
# Create bounding box
455
bbox = BoundingBox(box=[10, 20, 100, 80])
456
print(f"Area: {bbox.area}")
457
print(f"COCO format: {bbox.to_coco_bbox()}")
458

459
# Create category
460
category = Category(id=1, name="person")
461

462
# Create mask from boolean array
463
bool_mask = np.random.rand(100, 100) > 0.5
464
mask = Mask.from_bool_mask(bool_mask)
465

466
# Create complete annotation
467
annotation = ObjectAnnotation(
468
    bbox=bbox,
469
    category=category,
470
    mask=mask
471
)
472
```
473

474
### Coordinate Transformations
475

476
```python
477
# Create bbox with shift amount for coordinate mapping
478
bbox = BoundingBox(
479
    box=[50, 60, 150, 160], 
480
    shift_amount=(100, 100)
481
)
482

483
# Get shifted coordinates
484
shifted_bbox = bbox.get_shifted_box()
485
print(f"Original: {bbox.to_xyxy()}")
486
print(f"Shifted: {shifted_bbox.to_xyxy()}")
487

488
# Expand bounding box
489
expanded = bbox.get_expanded_box(ratio=0.2)  # 20% expansion
490
```
491

492
### Format Conversions
493

494
```python
495
from sahi.annotation import ObjectAnnotation
496

497
# Create from COCO format
498
coco_bbox = [10, 20, 50, 60]  # [x, y, width, height]
499
annotation = ObjectAnnotation.from_coco_bbox(
500
    bbox=coco_bbox,
501
    category_id=1,
502
    category_name="person"
503
)
504

505
# Convert to different formats
506
coco_annotation = annotation.to_coco_annotation()
507
shapely_annotation = annotation.to_shapely_annotation()
508

509
# Work with different coordinate systems
510
voc_bbox = annotation.bbox.to_voc_bbox()  # [xmin, ymin, xmax, ymax]
511
xyxy_bbox = annotation.bbox.to_xyxy()     # [xmin, ymin, xmax, ymax] as floats
512
```
513

514
### Working with Predictions
515

516
```python
517
from sahi.prediction import ObjectPrediction, PredictionScore
518

519
# Create prediction with confidence
520
score = PredictionScore(0.85)
521
prediction = ObjectPrediction(
522
    bbox=BoundingBox([10, 20, 100, 80]),
523
    category=Category(id=0, name="person"),
524
    score=score
525
)
526

527
# Check confidence threshold
528
if prediction.score.is_greater_than_threshold(0.5):
529
    print("High confidence detection")
530

531
# Convert to different output formats
532
coco_pred = prediction.to_coco_prediction()
533
fiftyone_det = prediction.to_fiftyone_detection()
534
```

Version

Tile

Files

annotation-framework.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

annotation-framework.mddocs/