Tessl Tile for pypi/sahi@0.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

annotation-framework.md cli.md coco-integration.md image-slicing.md index.md model-integration.md postprocessing.md prediction-functions.md utilities.md

utilities.mddocs/

0
# Utilities
1

2
SAHI provides comprehensive utility functions for computer vision operations, framework integrations, file I/O, and compatibility across different deep learning ecosystems. These utilities support the core functionality and provide additional convenience functions.
3

4
## Capabilities
5

6
### Computer Vision Utilities
7

8
Core computer vision operations including image reading, visualization, and format conversions.
9

10
```python { .api }
11
def read_image_as_pil(image_path: str) -> Image.Image:
12
    """
13
    Read image as PIL Image object.
14
    
15
    Parameters:
16
    - image_path (str): Path to image file
17
    
18
    Returns:
19
    Image.Image: PIL Image object
20
    """
21

22
def visualize_object_predictions(
23
    image: np.ndarray,
24
    object_prediction_list: List[ObjectPrediction],
25
    rect_th: int = 3,
26
    text_size: float = 3,
27
    text_th: float = 3,
28
    color: tuple = None,
29
    hide_labels: bool = False,
30
    hide_conf: bool = False,
31
    output_dir: Optional[str] = None,
32
    file_name: Optional[str] = "prediction_visual",
33
) -> np.ndarray:
34
    """
35
    Visualize object predictions on image with bounding boxes and labels.
36
    
37
    Parameters:
38
    - image (np.ndarray): Input image array
39
    - object_prediction_list: List of ObjectPrediction instances
40
    - rect_th (int): Rectangle thickness for bounding boxes
41
    - text_size (float): Text size for labels
42
    - text_th (float): Text thickness
43
    - color (tuple, optional): Custom color for all boxes (BGR format)
44
    - hide_labels (bool): Hide class labels
45
    - hide_conf (bool): Hide confidence scores  
46
    - output_dir (str, optional): Directory to save visualization
47
    - file_name (str): Name for saved visualization file
48
    
49
    Returns:
50
    np.ndarray: Visualized image with annotations
51
    """
52

53
def crop_object_predictions(
54
    image: np.ndarray,
55
    object_prediction_list: List[ObjectPrediction],
56
    output_dir: str,
57
    file_name: str = "prediction_visual",
58
    export_format: str = "deepcrop",
59
) -> Dict:
60
    """
61
    Crop detected objects from image and save individually.
62
    
63
    Parameters:
64
    - image (np.ndarray): Source image array
65
    - object_prediction_list: List of ObjectPrediction instances
66
    - output_dir (str): Directory for saving cropped images
67
    - file_name (str): Base name for cropped files
68
    - export_format (str): Export format ("deepcrop", "crop")
69
    
70
    Returns:
71
    Dict: Dictionary with crop information and file paths
72
    """
73
```
74

75
### Image Format and Conversion Utilities
76

77
```python { .api }
78
def get_coco_segmentation_from_bool_mask(bool_mask: np.ndarray) -> List[List[float]]:
79
    """
80
    Convert boolean mask to COCO polygon segmentation format.
81
    
82
    Parameters:
83
    - bool_mask (np.ndarray): Boolean mask array
84
    
85
    Returns:
86
    List[List[float]]: COCO format polygon coordinates
87
    """
88

89
def get_bool_mask_from_coco_segmentation(
90
    segmentation: List, 
91
    height: int, 
92
    width: int
93
) -> np.ndarray:
94
    """
95
    Convert COCO segmentation to boolean mask.
96
    
97
    Parameters:
98
    - segmentation (List): COCO format polygon segmentation
99
    - height (int): Mask height
100
    - width (int): Mask width
101
    
102
    Returns:
103
    np.ndarray: Boolean mask array
104
    """
105

106
def get_bbox_from_coco_segmentation(segmentation: List) -> List[int]:
107
    """
108
    Extract bounding box from COCO segmentation.
109
    
110
    Parameters:
111
    - segmentation (List): COCO format polygon segmentation
112
    
113
    Returns:
114
    List[int]: Bounding box [xmin, ymin, width, height]
115
    """
116
```
117

118
### Color Management
119

120
```python { .api }
121
class Colors:
122
    """
123
    Color palette for consistent visualization across different plots and frameworks.
124
    Provides color management for bounding boxes, labels, and visualization elements.
125
    """
126
    
127
    def __init__(self):
128
        """Initialize color palette with predefined colors."""
129
        
130
    def __call__(self, i: int, bgr: bool = False) -> Tuple[int, ...]:
131
        """
132
        Get color for given index.
133
        
134
        Parameters:
135
        - i (int): Color index
136
        - bgr (bool): Return BGR format instead of RGB
137
        
138
        Returns:
139
        Tuple[int, ...]: Color tuple (RGB or BGR)
140
        """
141
```
142

143
### File I/O Utilities
144

145
Comprehensive file operations supporting multiple formats and efficient data handling.
146

147
```python { .api }
148
def save_json(data: Dict, save_path: str):
149
    """
150
    Save data as JSON file with proper formatting.
151
    
152
    Parameters:
153
    - data (Dict): Data to save
154
    - save_path (str): Output file path
155
    """
156

157
def load_json(load_path: str) -> Dict:
158
    """
159
    Load JSON file as dictionary.
160
    
161
    Parameters:
162
    - load_path (str): Path to JSON file
163
    
164
    Returns:
165
    Dict: Loaded data
166
    """
167

168
def save_pickle(data: Any, save_path: str):
169
    """
170
    Save data as pickle file for efficient storage.
171
    
172
    Parameters:
173
    - data (Any): Data to save
174
    - save_path (str): Output file path
175
    """
176

177
def load_pickle(load_path: str) -> Any:
178
    """
179
    Load pickle file.
180
    
181
    Parameters:
182
    - load_path (str): Path to pickle file
183
    
184
    Returns:
185
    Any: Loaded data
186
    """
187

188
def list_files(
189
    directory: str,
190
    contains: Optional[List[str]] = None,
191
    extensions: Optional[List[str]] = None,
192
    recursive: bool = True,
193
) -> List[str]:
194
    """
195
    List files in directory with filtering options.
196
    
197
    Parameters:
198
    - directory (str): Directory to search
199
    - contains (List[str], optional): Substrings that filenames must contain
200
    - extensions (List[str], optional): File extensions to include
201
    - recursive (bool): Search subdirectories recursively
202
    
203
    Returns:
204
    List[str]: List of matching file paths
205
    """
206

207
def download_from_url(url: str, save_path: str):
208
    """
209
    Download file from URL.
210
    
211
    Parameters:
212
    - url (str): URL to download from
213
    - save_path (str): Local path to save file
214
    """
215

216
def import_model_class(model_class_name: str, model_type: str):
217
    """
218
    Dynamically import model class based on type.
219
    
220
    Parameters:
221
    - model_class_name (str): Name of model class to import
222
    - model_type (str): Model framework type
223
    
224
    Returns:
225
    Type: Imported model class
226
    """
227
```
228

229
### PyTorch Utilities
230

231
Utilities for PyTorch tensor operations and device management.
232

233
```python { .api }
234
def empty_cuda_cache():
235
    """Clear CUDA memory cache to free up GPU memory."""
236

237
def to_float_tensor(image: Union[np.ndarray, Image.Image]) -> torch.Tensor:
238
    """
239
    Convert image to PyTorch float tensor.
240
    
241
    Parameters:
242
    - image: Input image (numpy array or PIL Image)
243
    
244
    Returns:
245
    torch.Tensor: Float tensor in CHW format
246
    """
247

248
def torch_to_numpy(tensor: torch.Tensor) -> np.ndarray:
249
    """
250
    Convert PyTorch tensor to numpy array.
251
    
252
    Parameters:
253
    - tensor (torch.Tensor): Input tensor
254
    
255
    Returns:
256
    np.ndarray: Numpy array
257
    """
258

259
def select_device(device: Optional[str] = None) -> torch.device:
260
    """
261
    Select appropriate PyTorch device for inference.
262
    
263
    Parameters:
264
    - device (str, optional): Device specification ("cpu", "cuda", "mps", etc.)
265
    
266
    Returns:
267
    torch.device: Selected PyTorch device
268
    """
269
```
270

271
### Import and Environment Utilities
272

273
Utilities for checking dependencies and managing package imports.
274

275
```python { .api }
276
def is_available(package: str) -> bool:
277
    """
278
    Check if package is available for import.
279
    
280
    Parameters:
281
    - package (str): Package name to check
282
    
283
    Returns:
284
    bool: True if package is available
285
    """
286

287
def check_requirements(
288
    requirements: List[str], 
289
    raise_exception: bool = True
290
):
291
    """
292
    Verify that required packages are installed.
293
    
294
    Parameters:
295
    - requirements (List[str]): List of required package names
296
    - raise_exception (bool): Whether to raise exception if packages missing
297
    
298
    Raises:
299
    ImportError: If required packages are missing and raise_exception=True
300
    """
301

302
def get_package_info(package_name: str) -> Dict[str, str]:
303
    """
304
    Get information about installed package.
305
    
306
    Parameters:
307
    - package_name (str): Name of package to query
308
    
309
    Returns:
310
    Dict[str, str]: Package information (version, location, etc.)
311
    """
312

313
def print_environment_info():
314
    """
315
    Print comprehensive environment and dependency information.
316
    Includes Python version, PyTorch version, CUDA availability, 
317
    system information, and installed package versions.
318
    """
319
```
320

321
### Framework-Specific Utilities
322

323
Utilities for specific deep learning framework integrations.
324

325
```python { .api }
326
# Detectron2 utilities
327
def convert_detectron2_bbox_format(bbox: List) -> List:
328
    """Convert Detectron2 bbox format to standard format."""
329

330
def convert_detectron2_mask_format(mask: np.ndarray) -> np.ndarray:  
331
    """Convert Detectron2 mask format to standard format."""
332

333
# MMDetection utilities  
334
def convert_mmdet_bbox_format(bbox: List) -> List:
335
    """Convert MMDetection bbox format to standard format."""
336

337
def convert_mmdet_mask_format(mask: np.ndarray) -> np.ndarray:
338
    """Convert MMDetection mask format to standard format."""
339

340
# TorchVision utilities
341
def convert_torchvision_bbox_format(bbox: torch.Tensor) -> List:
342
    """Convert TorchVision bbox format to standard format."""
343

344
# RT-DETR utilities
345
def convert_rtdetr_output_format(outputs: Dict) -> List:
346
    """Convert RT-DETR output format to standard ObjectPrediction format."""
347
```
348

349
### File Path and Video Utilities
350

351
```python { .api }
352
class Path:
353
    """Enhanced path handling with additional convenience methods."""
354
    
355
    def __init__(self, path: str):
356
        """Initialize path handler."""
357
    
358
    @property
359
    def suffix(self) -> str:
360
        """Get file extension."""
361
    
362
    @property
363
    def stem(self) -> str:
364
        """Get filename without extension."""
365

366
def increment_path(path: str, exist_ok: bool = False) -> str:
367
    """
368
    Increment file path to avoid overwrites.
369
    
370
    Parameters:
371
    - path (str): Original path
372
    - exist_ok (bool): Whether existing path is acceptable
373
    
374
    Returns:
375
    str: Incremented path (e.g., "file_1.txt", "file_2.txt")
376
    """
377

378
def get_video_reader(video_path: str):
379
    """
380
    Get video reader object for frame-by-frame processing.
381
    
382
    Parameters:
383
    - video_path (str): Path to video file
384
    
385
    Returns:
386
    Video reader object
387
    """
388
```
389

390
### Constants
391

392
```python { .api }
393
# Supported file extensions
394
IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif']
395
VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv']
396
IMAGE_EXTENSIONS_LOSSLESS = ['.png', '.tiff', '.bmp']
397
IMAGE_EXTENSIONS_LOSSY = ['.jpg', '.jpeg']
398
```
399

400
## Usage Examples
401

402
### Image Processing and Visualization
403

404
```python
405
from sahi.utils.cv import read_image_as_pil, visualize_object_predictions
406
from sahi import get_sliced_prediction
407

408
# Read image
409
image = read_image_as_pil("input_image.jpg")
410

411
# Get predictions
412
result = get_sliced_prediction(
413
    image="input_image.jpg",
414
    detection_model=model
415
)
416

417
# Visualize predictions
418
visualized = visualize_object_predictions(
419
    image=np.array(image),
420
    object_prediction_list=result.object_prediction_list,
421
    rect_th=3,
422
    text_size=1.0,
423
    hide_conf=False,
424
    output_dir="visualizations/",
425
    file_name="result"
426
)
427
```
428

429
### File Operations
430

431
```python
432
from sahi.utils.file import save_json, load_json, list_files
433

434
# Save prediction results
435
predictions_data = {
436
    "predictions": [pred.json for pred in result.object_prediction_list],
437
    "metadata": {"model": "yolov8n", "confidence": 0.25}
438
}
439
save_json(predictions_data, "predictions.json")
440

441
# Load data
442
loaded_data = load_json("predictions.json")
443

444
# List image files
445
image_files = list_files(
446
    directory="dataset/",
447
    extensions=[".jpg", ".png"],
448
    contains=["train", "val"],
449
    recursive=True
450
)
451
print(f"Found {len(image_files)} image files")
452
```
453

454
### Environment and Dependency Management
455

456
```python
457
from sahi.utils.import_utils import is_available, check_requirements, print_environment_info
458

459
# Check if optional dependencies are available
460
if is_available("fiftyone"):
461
    print("FiftyOne integration available")
462

463
if is_available("mmdet"):
464
    print("MMDetection integration available")
465

466
# Verify required packages
467
try:
468
    check_requirements(["torch", "torchvision", "ultralytics"])
469
    print("All requirements satisfied")
470
except ImportError as e:
471
    print(f"Missing requirements: {e}")
472

473
# Print full environment info
474
print_environment_info()
475
```
476

477
### PyTorch Utilities
478

479
```python
480
from sahi.utils.torch_utils import select_device, empty_cuda_cache, to_float_tensor
481
import numpy as np
482

483
# Select optimal device
484
device = select_device("cuda")
485
print(f"Using device: {device}")
486

487
# Convert image to tensor
488
image_array = np.random.rand(480, 640, 3).astype(np.uint8)
489
tensor = to_float_tensor(image_array)
490
print(f"Tensor shape: {tensor.shape}")
491

492
# Clear CUDA cache after processing
493
empty_cuda_cache()
494
```
495

496
### Custom Colors for Visualization
497

498
```python
499
from sahi.utils.cv import Colors, visualize_object_predictions
500

501
# Initialize color palette
502
colors = Colors()
503

504
# Get specific colors
505
red = colors(0)      # First color in palette
506
blue = colors(1)     # Second color  
507
green = colors(2)    # Third color
508

509
# Use custom color for visualization
510
visualized = visualize_object_predictions(
511
    image=image_array,
512
    object_prediction_list=predictions,
513
    color=(0, 255, 0),  # Custom green color
514
    rect_th=2,
515
    text_size=0.8
516
)
517
```
518

519
### File Path Management
520

521
```python
522
from sahi.utils.file import increment_path, Path
523

524
# Avoid overwriting existing files
525
output_path = increment_path("results/experiment.json")
526
print(f"Using path: {output_path}")  # e.g., "results/experiment_1.json"
527

528
# Enhanced path handling
529
path = Path("dataset/images/sample.jpg")
530
print(f"Extension: {path.suffix}")  # ".jpg"
531
print(f"Filename: {path.stem}")     # "sample"
532
```
533

534
### Format Conversions
535

536
```python
537
from sahi.utils.cv import (
538
    get_coco_segmentation_from_bool_mask,
539
    get_bool_mask_from_coco_segmentation,
540
    get_bbox_from_coco_segmentation
541
)
542

543
# Create boolean mask
544
bool_mask = np.random.rand(100, 100) > 0.5
545

546
# Convert to COCO format
547
coco_segmentation = get_coco_segmentation_from_bool_mask(bool_mask)
548

549
# Convert back to boolean mask
550
reconstructed_mask = get_bool_mask_from_coco_segmentation(
551
    coco_segmentation, 100, 100
552
)
553

554
# Extract bounding box from segmentation
555
bbox = get_bbox_from_coco_segmentation(coco_segmentation)
556
print(f"Bounding box: {bbox}")
557
```

Version

Tile

Files

utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities.mddocs/