0
# Coordinate and Data Conversion
1
2
Utilities for converting between different coordinate formats, processing masks and polygons, and transforming data between various computer vision frameworks.
3
4
## Capabilities
5
6
### Box Coordinate Conversion
7
8
Functions for converting between different bounding box coordinate formats.
9
10
```python { .api }
11
def xyxy_to_xywh(xyxy: np.ndarray) -> np.ndarray:
12
"""
13
Convert bounding boxes from (x1, y1, x2, y2) to (x, y, width, height) format.
14
15
Args:
16
xyxy (np.ndarray): Bounding boxes in [x1, y1, x2, y2] format, shape (n, 4)
17
18
Returns:
19
np.ndarray: Bounding boxes in [x, y, width, height] format, shape (n, 4)
20
"""
21
22
def xywh_to_xyxy(xywh: np.ndarray) -> np.ndarray:
23
"""
24
Convert bounding boxes from (x, y, width, height) to (x1, y1, x2, y2) format.
25
26
Args:
27
xywh (np.ndarray): Bounding boxes in [x, y, width, height] format, shape (n, 4)
28
29
Returns:
30
np.ndarray: Bounding boxes in [x1, y1, x2, y2] format, shape (n, 4)
31
"""
32
33
def xcycwh_to_xyxy(xcycwh: np.ndarray) -> np.ndarray:
34
"""
35
Convert from center format (x_center, y_center, width, height) to corner format.
36
37
Args:
38
xcycwh (np.ndarray): Boxes in [x_center, y_center, width, height] format
39
40
Returns:
41
np.ndarray: Boxes in [x1, y1, x2, y2] format
42
"""
43
44
def xyxy_to_xcycarh(xyxy: np.ndarray) -> np.ndarray:
45
"""
46
Convert to center format with aspect ratio and height.
47
48
Args:
49
xyxy (np.ndarray): Boxes in [x1, y1, x2, y2] format
50
51
Returns:
52
np.ndarray: Boxes in [x_center, y_center, aspect_ratio, height] format
53
"""
54
```
55
56
### Mask and Polygon Conversion
57
58
Functions for converting between masks, polygons, and bounding boxes.
59
60
```python { .api }
61
def mask_to_polygons(mask: np.ndarray) -> list[np.ndarray]:
62
"""
63
Convert segmentation mask to polygon representation.
64
65
Args:
66
mask (np.ndarray): Binary mask, shape (H, W)
67
68
Returns:
69
list[np.ndarray]: List of polygons, each as (n, 2) coordinate arrays
70
"""
71
72
def polygon_to_mask(polygon: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
73
"""
74
Convert polygon to binary mask.
75
76
Args:
77
polygon (np.ndarray): Polygon coordinates, shape (n, 2)
78
resolution_wh (tuple[int, int]): Output mask size (width, height)
79
80
Returns:
81
np.ndarray: Binary mask, shape (H, W)
82
"""
83
84
def mask_to_xyxy(mask: np.ndarray) -> np.ndarray:
85
"""
86
Extract bounding boxes from segmentation masks.
87
88
Args:
89
mask (np.ndarray): Binary masks, shape (n, H, W)
90
91
Returns:
92
np.ndarray: Bounding boxes in [x1, y1, x2, y2] format, shape (n, 4)
93
"""
94
95
def polygon_to_xyxy(polygon: np.ndarray) -> np.ndarray:
96
"""
97
Extract bounding box from polygon coordinates.
98
99
Args:
100
polygon (np.ndarray): Polygon coordinates, shape (n, 2)
101
102
Returns:
103
np.ndarray: Bounding box [x1, y1, x2, y2]
104
"""
105
106
def xyxy_to_polygons(xyxy: np.ndarray) -> list[np.ndarray]:
107
"""
108
Convert bounding boxes to polygon representation.
109
110
Args:
111
xyxy (np.ndarray): Bounding boxes in [x1, y1, x2, y2] format
112
113
Returns:
114
list[np.ndarray]: List of polygon arrays, each shape (4, 2)
115
"""
116
```
117
118
### Box Manipulation
119
120
Utilities for manipulating bounding box coordinates.
121
122
```python { .api }
123
def clip_boxes(xyxy: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
124
"""
125
Clip bounding boxes to image boundaries.
126
127
Args:
128
xyxy (np.ndarray): Bounding boxes in [x1, y1, x2, y2] format
129
resolution_wh (tuple[int, int]): Image dimensions (width, height)
130
131
Returns:
132
np.ndarray: Clipped bounding boxes
133
"""
134
135
def pad_boxes(xyxy: np.ndarray, px: int) -> np.ndarray:
136
"""
137
Add padding to bounding boxes.
138
139
Args:
140
xyxy (np.ndarray): Bounding boxes in [x1, y1, x2, y2] format
141
px (int): Padding in pixels
142
143
Returns:
144
np.ndarray: Padded bounding boxes
145
"""
146
147
def scale_boxes(xyxy: np.ndarray, factor: float) -> np.ndarray:
148
"""
149
Scale bounding boxes by a factor.
150
151
Args:
152
xyxy (np.ndarray): Bounding boxes in [x1, y1, x2, y2] format
153
factor (float): Scaling factor
154
155
Returns:
156
np.ndarray: Scaled bounding boxes
157
"""
158
159
def move_boxes(xyxy: np.ndarray, offset: np.ndarray) -> np.ndarray:
160
"""
161
Translate bounding boxes by offset.
162
163
Args:
164
xyxy (np.ndarray): Bounding boxes in [x1, y1, x2, y2] format
165
offset (np.ndarray): Translation offset [dx, dy]
166
167
Returns:
168
np.ndarray: Translated bounding boxes
169
"""
170
171
def denormalize_boxes(xyxy: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
172
"""
173
Convert normalized coordinates [0-1] to absolute pixel coordinates.
174
175
Args:
176
xyxy (np.ndarray): Normalized bounding boxes
177
resolution_wh (tuple[int, int]): Image dimensions (width, height)
178
179
Returns:
180
np.ndarray: Absolute coordinate bounding boxes
181
"""
182
```
183
184
### Dataset Format Conversion
185
186
Functions for converting between different dataset annotation formats.
187
188
```python { .api }
189
def mask_to_rle(mask: np.ndarray) -> dict:
190
"""
191
Convert binary mask to Run Length Encoding (RLE) format.
192
193
Args:
194
mask (np.ndarray): Binary mask, shape (H, W)
195
196
Returns:
197
dict: RLE encoded mask in COCO format
198
"""
199
200
def rle_to_mask(rle: dict) -> np.ndarray:
201
"""
202
Convert RLE encoded mask back to binary mask.
203
204
Args:
205
rle (dict): RLE encoded mask in COCO format
206
207
Returns:
208
np.ndarray: Binary mask, shape (H, W)
209
"""
210
211
def get_coco_class_index_mapping() -> dict[str, int]:
212
"""
213
Get mapping from COCO class names to indices.
214
215
Returns:
216
dict[str, int]: Mapping from class names to indices
217
"""
218
```
219
220
## Usage Examples
221
222
### Basic Coordinate Conversion
223
224
```python
225
import supervision as sv
226
import numpy as np
227
228
# Convert from corner to center format
229
xyxy_boxes = np.array([[100, 100, 200, 200], [300, 150, 400, 250]])
230
xywh_boxes = sv.xyxy_to_xywh(xyxy_boxes)
231
print(xywh_boxes) # [[100, 100, 100, 100], [300, 150, 100, 100]]
232
233
# Convert back to corner format
234
xyxy_converted = sv.xywh_to_xyxy(xywh_boxes)
235
```
236
237
### Mask to Polygon Conversion
238
239
```python
240
import supervision as sv
241
import cv2
242
243
# Load segmentation mask
244
mask = cv2.imread("mask.png", cv2.IMREAD_GRAYSCALE) > 0
245
246
# Convert to polygons
247
polygons = sv.mask_to_polygons(mask)
248
249
# Convert back to mask
250
reconstructed_mask = sv.polygon_to_mask(
251
polygon=polygons[0],
252
resolution_wh=(mask.shape[1], mask.shape[0])
253
)
254
```
255
256
### Box Manipulation Pipeline
257
258
```python
259
import supervision as sv
260
import numpy as np
261
262
# Original boxes
263
boxes = np.array([[50, 50, 150, 150], [200, 100, 300, 200]])
264
265
# Apply transformations
266
padded_boxes = sv.pad_boxes(boxes, px=10)
267
scaled_boxes = sv.scale_boxes(padded_boxes, factor=1.2)
268
moved_boxes = sv.move_boxes(scaled_boxes, offset=np.array([20, 30]))
269
270
# Clip to image boundaries
271
image_size = (640, 480)
272
final_boxes = sv.clip_boxes(moved_boxes, resolution_wh=image_size)
273
```
274
275
### Dataset Format Conversion
276
277
```python
278
import supervision as sv
279
280
# Convert mask to COCO RLE format
281
mask = np.zeros((480, 640), dtype=bool)
282
mask[100:200, 100:200] = True
283
284
rle = sv.mask_to_rle(mask)
285
print(rle) # {'size': [480, 640], 'counts': ...}
286
287
# Convert back to mask
288
reconstructed_mask = sv.rle_to_mask(rle)
289
```
290
291
## Types
292
293
```python { .api }
294
# Coordinate format type aliases
295
XYXYFormat = np.ndarray # Shape: (n, 4) - [x1, y1, x2, y2]
296
XYWHFormat = np.ndarray # Shape: (n, 4) - [x, y, width, height]
297
XCYCWHFormat = np.ndarray # Shape: (n, 4) - [x_center, y_center, width, height]
298
299
# Polygon and mask types
300
Polygon = np.ndarray # Shape: (n, 2) - [(x, y), ...]
301
PolygonList = list[np.ndarray] # List of polygon coordinate arrays
302
BinaryMask = np.ndarray # Shape: (H, W) - boolean mask
303
RLEMask = dict # Run Length Encoded mask in COCO format
304
305
# Image resolution type
306
Resolution = tuple[int, int] # (width, height)
307
```