or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

annotation-framework.mdcli.mdcoco-integration.mdimage-slicing.mdindex.mdmodel-integration.mdpostprocessing.mdprediction-functions.mdutilities.md

annotation-framework.mddocs/

0

# Annotation Framework

1

2

SAHI provides a comprehensive annotation framework with data structures for handling bounding boxes, masks, categories, and complete object annotations. The framework supports multiple format conversions and provides consistent APIs for manipulation across different computer vision tasks.

3

4

## Capabilities

5

6

### BoundingBox

7

8

Immutable dataclass representing rectangular regions with coordinates and optional shift amounts for coordinate transformation.

9

10

```python { .api }

11

@dataclass(frozen=True)

12

class BoundingBox:

13

box: Union[Tuple[float, float, float, float], List[float]]

14

shift_amount: Tuple[int, int] = (0, 0)

15

16

def __post_init__(self): ...

17

18

@property

19

def minx(self) -> float: ...

20

21

@property

22

def miny(self) -> float: ...

23

24

@property

25

def maxx(self) -> float: ...

26

27

@property

28

def maxy(self) -> float: ...

29

30

@property

31

def area(self) -> float: ...

32

33

def get_expanded_box(self, ratio: float = 0.1) -> "BoundingBox":

34

"""

35

Return expanded bounding box by specified ratio.

36

37

Parameters:

38

- ratio (float): Expansion ratio (0.1 = 10% expansion)

39

40

Returns:

41

BoundingBox: New expanded bounding box

42

"""

43

44

def to_xywh(self) -> List[float]:

45

"""

46

Convert to [xmin, ymin, width, height] format.

47

48

Returns:

49

List[float]: Bounding box in xywh format

50

"""

51

52

def to_coco_bbox(self) -> List[float]:

53

"""

54

Convert to COCO format [xmin, ymin, width, height].

55

56

Returns:

57

List[float]: COCO format bounding box

58

"""

59

60

def to_xyxy(self) -> List[float]:

61

"""

62

Convert to [xmin, ymin, xmax, ymax] format.

63

64

Returns:

65

List[float]: Bounding box in xyxy format

66

"""

67

68

def to_voc_bbox(self) -> List[int]:

69

"""

70

Convert to VOC format [xmin, ymin, xmax, ymax] as integers.

71

72

Returns:

73

List[int]: VOC format bounding box

74

"""

75

76

def get_shifted_box(self) -> "BoundingBox":

77

"""

78

Return shifted BoundingBox using the shift_amount.

79

80

Returns:

81

BoundingBox: New shifted bounding box

82

"""

83

```

84

85

### Category

86

87

Immutable dataclass for object categories with ID and name fields.

88

89

```python { .api }

90

@dataclass(frozen=True)

91

class Category:

92

id: Optional[Union[int, str]] = None

93

name: Optional[str] = None

94

95

def __post_init__(self): ...

96

```

97

98

### Mask

99

100

Segmentation mask class handling COCO format polygon segmentation and boolean masks.

101

102

```python { .api }

103

class Mask:

104

def __init__(

105

self,

106

segmentation: List[List[float]],

107

full_shape: List[int],

108

shift_amount: list = [0, 0],

109

):

110

"""

111

Initialize mask from COCO segmentation format.

112

113

Parameters:

114

- segmentation (List[List[float]]): COCO format polygon segmentation coordinates

115

- full_shape (List[int]): Full image dimensions [height, width]

116

- shift_amount (list): Coordinate shift [shift_x, shift_y]

117

"""

118

119

@property

120

def bool_mask(self) -> np.ndarray: ...

121

122

@property

123

def segmentation(self) -> List: ...

124

125

@property

126

def area(self) -> int: ...

127

128

@classmethod

129

def from_float_mask(

130

cls,

131

mask: np.ndarray,

132

full_shape: List[int],

133

mask_threshold: float = 0.5,

134

shift_amount: list = [0, 0],

135

) -> "Mask":

136

"""

137

Create mask from float numpy array using threshold.

138

139

Parameters:

140

- mask (np.ndarray): Float mask array (0-1 values)

141

- full_shape (List[int]): Full image dimensions [height, width]

142

- mask_threshold (float): Threshold for converting to boolean

143

- shift_amount (list): Coordinate shift [shift_x, shift_y]

144

145

Returns:

146

Mask: New Mask instance

147

"""

148

149

@classmethod

150

def from_bool_mask(

151

cls,

152

bool_mask: np.ndarray,

153

full_shape: List[int],

154

shift_amount: list = [0, 0],

155

) -> "Mask":

156

"""

157

Create mask from boolean numpy array.

158

159

Parameters:

160

- bool_mask (np.ndarray): Boolean mask array

161

- full_shape (List[int]): Full image dimensions [height, width]

162

- shift_amount (list): Coordinate shift [shift_x, shift_y]

163

164

Returns:

165

Mask: New Mask instance

166

"""

167

168

def get_shifted_mask(self) -> "Mask":

169

"""

170

Return shifted mask using shift_amount.

171

172

Returns:

173

Mask: New shifted mask

174

"""

175

```

176

177

### ObjectAnnotation

178

179

Complete annotation combining bounding box, mask, and category information with extensive format conversion capabilities.

180

181

```python { .api }

182

class ObjectAnnotation:

183

def __init__(

184

self,

185

bbox: Optional[BoundingBox] = None,

186

category: Optional[Category] = None,

187

mask: Optional[Mask] = None,

188

shift_amount: Optional[List[int]] = None,

189

full_shape: Optional[List[int]] = None,

190

):

191

"""

192

Initialize complete object annotation.

193

194

Parameters:

195

- bbox (BoundingBox, optional): Bounding box

196

- category (Category, optional): Object category

197

- mask (Mask, optional): Segmentation mask

198

- shift_amount (List[int], optional): Coordinate shift [x, y]

199

- full_shape (List[int], optional): Full image shape [height, width]

200

"""

201

202

@property

203

def area(self) -> Union[int, float]: ...

204

205

@classmethod

206

def from_bool_mask(

207

cls,

208

bool_mask: np.ndarray,

209

full_shape: List[int],

210

category_id: Optional[int] = None,

211

category_name: Optional[str] = None,

212

shift_amount: List[int] = [0, 0],

213

) -> "ObjectAnnotation":

214

"""

215

Create annotation from boolean mask.

216

217

Parameters:

218

- bool_mask (np.ndarray): Boolean segmentation mask

219

- full_shape (List[int]): Full image dimensions [height, width]

220

- category_id (int, optional): Category ID

221

- category_name (str, optional): Category name

222

- shift_amount (List[int]): Coordinate shift

223

224

Returns:

225

ObjectAnnotation: New annotation instance

226

"""

227

228

@classmethod

229

def from_coco_segmentation(

230

cls,

231

segmentation: List,

232

full_shape: List[int],

233

category_id: Optional[int] = None,

234

category_name: Optional[str] = None,

235

shift_amount: List[int] = [0, 0],

236

) -> "ObjectAnnotation":

237

"""

238

Create annotation from COCO segmentation format.

239

240

Parameters:

241

- segmentation (List): COCO format polygon segmentation

242

- full_shape (List[int]): Full image dimensions

243

- category_id (int, optional): Category ID

244

- category_name (str, optional): Category name

245

- shift_amount (List[int]): Coordinate shift

246

247

Returns:

248

ObjectAnnotation: New annotation instance

249

"""

250

251

@classmethod

252

def from_coco_bbox(

253

cls,

254

bbox: List[Union[int, float]],

255

category_id: Optional[int] = None,

256

category_name: Optional[str] = None,

257

shift_amount: List[int] = [0, 0],

258

) -> "ObjectAnnotation":

259

"""

260

Create annotation from COCO bounding box format.

261

262

Parameters:

263

- bbox (List): COCO format bbox [x, y, width, height]

264

- category_id (int, optional): Category ID

265

- category_name (str, optional): Category name

266

- shift_amount (List[int]): Coordinate shift

267

268

Returns:

269

ObjectAnnotation: New annotation instance

270

"""

271

272

@classmethod

273

def from_coco_annotation_dict(

274

cls,

275

annotation_dict: Dict,

276

full_shape: List[int],

277

shift_amount: List[int] = [0, 0],

278

) -> "ObjectAnnotation":

279

"""

280

Create annotation from COCO annotation dictionary.

281

282

Parameters:

283

- annotation_dict (Dict): COCO annotation dictionary

284

- full_shape (List[int]): Full image dimensions

285

- shift_amount (List[int]): Coordinate shift

286

287

Returns:

288

ObjectAnnotation: New annotation instance

289

"""

290

291

def to_coco_annotation(self) -> "CocoAnnotation":

292

"""Convert to CocoAnnotation format."""

293

294

def to_coco_prediction(self) -> "CocoPrediction":

295

"""Convert to CocoPrediction format."""

296

297

def to_shapely_annotation(self) -> "ShapelyAnnotation":

298

"""Convert to Shapely annotation format."""

299

300

def to_imantics_annotation(self):

301

"""Convert to Imantics annotation format."""

302

303

def deepcopy(self) -> "ObjectAnnotation":

304

"""Return deep copy of annotation."""

305

306

def get_shifted_object_annotation(self) -> "ObjectAnnotation":

307

"""Return shifted annotation using shift_amount."""

308

```

309

310

### ObjectPrediction

311

312

Object detection prediction with confidence score, inheriting from ObjectAnnotation with additional prediction-specific methods.

313

314

```python { .api }

315

class ObjectPrediction(ObjectAnnotation):

316

def __init__(

317

self,

318

bbox: Optional[List[int]] = None,

319

category_id: Optional[int] = None,

320

category_name: Optional[str] = None,

321

segmentation: Optional[List[List[float]]] = None,

322

score: float = 0.0,

323

shift_amount: Optional[List[int]] = [0, 0],

324

full_shape: Optional[List[int]] = None,

325

):

326

"""

327

Initialize object prediction with confidence score.

328

329

Parameters:

330

- bbox (List[int], optional): Bounding box coordinates [minx, miny, maxx, maxy]

331

- category_id (int, optional): Category ID

332

- category_name (str, optional): Category name

333

- segmentation (List[List[float]], optional): COCO format polygon segmentation

334

- score (float): Confidence score between 0 and 1

335

- shift_amount (List[int], optional): Coordinate shift [shift_x, shift_y]

336

- full_shape (List[int], optional): Full image dimensions [height, width]

337

"""

338

339

def get_shifted_object_prediction(self) -> "ObjectPrediction":

340

"""

341

Return shifted prediction for full image coordinate mapping.

342

343

Returns:

344

ObjectPrediction: New shifted prediction

345

"""

346

347

def to_coco_prediction(self) -> "CocoPrediction":

348

"""

349

Convert to COCO prediction format.

350

351

Returns:

352

CocoPrediction: COCO format prediction

353

"""

354

355

def to_fiftyone_detection(self):

356

"""

357

Convert to FiftyOne detection format.

358

359

Returns:

360

FiftyOne Detection object

361

"""

362

```

363

364

### PredictionScore

365

366

Wrapper for prediction confidence scores with comparison operations.

367

368

```python { .api }

369

class PredictionScore:

370

def __init__(self, value: Union[float, np.ndarray]):

371

"""

372

Initialize prediction score.

373

374

Parameters:

375

- value: Confidence score between 0 and 1

376

"""

377

378

@property

379

def value(self) -> float: ...

380

381

def is_greater_than_threshold(self, threshold: float) -> bool:

382

"""

383

Check if score exceeds threshold.

384

385

Parameters:

386

- threshold (float): Threshold value

387

388

Returns:

389

bool: True if score > threshold

390

"""

391

392

def __eq__(self, threshold: float) -> bool: ...

393

def __gt__(self, threshold: float) -> bool: ...

394

def __lt__(self, threshold: float) -> bool: ...

395

```

396

397

### PredictionResult

398

399

Container for prediction results with image data and export capabilities.

400

401

```python { .api }

402

class PredictionResult:

403

def __init__(

404

self,

405

object_prediction_list: List[ObjectPrediction],

406

image: Image.Image,

407

durations_in_seconds: Optional[Dict] = None,

408

):

409

"""

410

Initialize prediction result container.

411

412

Parameters:

413

- object_prediction_list: List of predictions

414

- image: Original PIL Image

415

- durations_in_seconds: Timing profiling data

416

"""

417

418

@property

419

def object_prediction_list(self) -> List[ObjectPrediction]: ...

420

421

@property

422

def image(self) -> Image.Image: ...

423

424

def export_visuals(self, export_dir: str, text_size: float = None):

425

"""

426

Export visualization images to directory.

427

428

Parameters:

429

- export_dir (str): Output directory path

430

- text_size (float, optional): Text size for labels

431

"""

432

433

def to_coco_annotations(self) -> List["CocoAnnotation"]:

434

"""Convert predictions to COCO annotation list."""

435

436

def to_coco_predictions(self) -> List["CocoPrediction"]:

437

"""Convert to COCO prediction list."""

438

439

def to_imantics_annotations(self) -> List:

440

"""Convert to Imantics annotation list."""

441

442

def to_fiftyone_detections(self) -> List:

443

"""Convert to FiftyOne detection list."""

444

```

445

446

## Usage Examples

447

448

### Creating Annotations

449

450

```python

451

from sahi import BoundingBox, Category, Mask, ObjectAnnotation

452

import numpy as np

453

454

# Create bounding box

455

bbox = BoundingBox(box=[10, 20, 100, 80])

456

print(f"Area: {bbox.area}")

457

print(f"COCO format: {bbox.to_coco_bbox()}")

458

459

# Create category

460

category = Category(id=1, name="person")

461

462

# Create mask from boolean array

463

bool_mask = np.random.rand(100, 100) > 0.5

464

mask = Mask.from_bool_mask(bool_mask)

465

466

# Create complete annotation

467

annotation = ObjectAnnotation(

468

bbox=bbox,

469

category=category,

470

mask=mask

471

)

472

```

473

474

### Coordinate Transformations

475

476

```python

477

# Create bbox with shift amount for coordinate mapping

478

bbox = BoundingBox(

479

box=[50, 60, 150, 160],

480

shift_amount=(100, 100)

481

)

482

483

# Get shifted coordinates

484

shifted_bbox = bbox.get_shifted_box()

485

print(f"Original: {bbox.to_xyxy()}")

486

print(f"Shifted: {shifted_bbox.to_xyxy()}")

487

488

# Expand bounding box

489

expanded = bbox.get_expanded_box(ratio=0.2) # 20% expansion

490

```

491

492

### Format Conversions

493

494

```python

495

from sahi.annotation import ObjectAnnotation

496

497

# Create from COCO format

498

coco_bbox = [10, 20, 50, 60] # [x, y, width, height]

499

annotation = ObjectAnnotation.from_coco_bbox(

500

bbox=coco_bbox,

501

category_id=1,

502

category_name="person"

503

)

504

505

# Convert to different formats

506

coco_annotation = annotation.to_coco_annotation()

507

shapely_annotation = annotation.to_shapely_annotation()

508

509

# Work with different coordinate systems

510

voc_bbox = annotation.bbox.to_voc_bbox() # [xmin, ymin, xmax, ymax]

511

xyxy_bbox = annotation.bbox.to_xyxy() # [xmin, ymin, xmax, ymax] as floats

512

```

513

514

### Working with Predictions

515

516

```python

517

from sahi.prediction import ObjectPrediction, PredictionScore

518

519

# Create prediction with confidence

520

score = PredictionScore(0.85)

521

prediction = ObjectPrediction(

522

bbox=BoundingBox([10, 20, 100, 80]),

523

category=Category(id=0, name="person"),

524

score=score

525

)

526

527

# Check confidence threshold

528

if prediction.score.is_greater_than_threshold(0.5):

529

print("High confidence detection")

530

531

# Convert to different output formats

532

coco_pred = prediction.to_coco_prediction()

533

fiftyone_det = prediction.to_fiftyone_detection()

534

```