0
# Core Data Structures
1
2
The fundamental data classes that standardize computer vision results across different frameworks. These structures provide a unified interface for detections, classifications, and keypoints.
3
4
## Capabilities
5
6
### Detections
7
8
The primary data structure for object detection and segmentation results. Standardizes outputs from various models into a consistent format for downstream processing.
9
10
```python { .api }
11
@dataclass
12
class Detections:
13
"""
14
Standardizes detection/segmentation results from various models.
15
16
Attributes:
17
xyxy (np.ndarray): Bounding boxes in [x1, y1, x2, y2] format, shape (n, 4)
18
mask (np.ndarray | None): Segmentation masks, shape (n, H, W)
19
confidence (np.ndarray | None): Detection confidence scores, shape (n,)
20
class_id (np.ndarray | None): Class IDs for detections, shape (n,)
21
tracker_id (np.ndarray | None): Tracking IDs, shape (n,)
22
data (dict[str, np.ndarray | list]): Additional detection data
23
metadata (dict[str, Any]): Collection-level metadata
24
"""
25
xyxy: np.ndarray
26
mask: np.ndarray | None = None
27
confidence: np.ndarray | None = None
28
class_id: np.ndarray | None = None
29
tracker_id: np.ndarray | None = None
30
data: dict[str, np.ndarray | list] = field(default_factory=dict)
31
metadata: dict[str, Any] = field(default_factory=dict)
32
33
def __len__(self) -> int:
34
"""Returns the number of detections."""
35
36
def __iter__(self) -> Iterator[tuple[
37
np.ndarray, np.ndarray | None, float | None,
38
int | None, int | None, dict[str, np.ndarray | list]
39
]]:
40
"""Iterates over detections yielding (xyxy, mask, confidence, class_id, tracker_id, data)."""
41
42
@classmethod
43
def from_ultralytics(cls, ultralytics_results) -> "Detections":
44
"""Create from Ultralytics YOLO results (detection, segmentation, OBB)."""
45
46
@classmethod
47
def from_yolov5(cls, yolov5_results) -> "Detections":
48
"""Create from YOLOv5 results."""
49
50
@classmethod
51
def from_transformers(cls, transformers_results: dict, id2label: dict[int, str] | None = None) -> "Detections":
52
"""Create from HuggingFace Transformers results."""
53
54
@classmethod
55
def from_mmdetection(cls, mmdet_results) -> "Detections":
56
"""Create from MMDetection results."""
57
58
@classmethod
59
def from_tensorflow(cls, tensorflow_results: dict, resolution_wh: tuple) -> "Detections":
60
"""Create from TensorFlow Hub results."""
61
62
@classmethod
63
def from_detectron2(cls, detectron2_results) -> "Detections":
64
"""Create from Detectron2 results."""
65
66
@classmethod
67
def from_inference(cls, roboflow_results: dict) -> "Detections":
68
"""Create from Roboflow Inference API results."""
69
70
@classmethod
71
def from_detr(cls, detr_results) -> "Detections":
72
"""Create from DETR model results."""
73
74
@classmethod
75
def from_sam(cls, sam_results: list) -> "Detections":
76
"""Create from Segment Anything Model results."""
77
78
@classmethod
79
def from_yolo_nas(cls, yolo_nas_results) -> "Detections":
80
"""Create from YOLO-NAS results."""
81
82
@classmethod
83
def from_deepsparse(cls, deepsparse_results) -> "Detections":
84
"""Create from DeepSparse results."""
85
86
@classmethod
87
def empty(cls) -> "Detections":
88
"""Create empty Detections instance."""
89
90
def with_nms(self, threshold: float = 0.5, class_agnostic: bool = False) -> "Detections":
91
"""Apply Non-Maximum Suppression filtering."""
92
93
def with_nmm(self, threshold: float = 0.5, class_agnostic: bool = False) -> "Detections":
94
"""Apply Non-Maximum Merging."""
95
96
def get_anchors_coordinates(self, anchor: Position) -> np.ndarray:
97
"""Get anchor point coordinates for each detection."""
98
99
def clip_to_image(self, resolution_wh: tuple[int, int]) -> "Detections":
100
"""Clip detections to image boundaries."""
101
102
def pad(self, px: int) -> "Detections":
103
"""Add padding to bounding boxes."""
104
105
def scale(self, factor: float, center: tuple[float, float] | None = None) -> "Detections":
106
"""Scale detections by a factor."""
107
108
def shift(self, shift: np.ndarray) -> "Detections":
109
"""Shift detections by offset."""
110
111
def crop_image(self, image: np.ndarray) -> list[np.ndarray]:
112
"""Extract cropped regions from image."""
113
114
def filter(self, mask: np.ndarray, inplace: bool = False) -> "Detections":
115
"""Filter detections using boolean mask."""
116
117
def merge(self, detections_list: list["Detections"]) -> "Detections":
118
"""Merge multiple Detections instances."""
119
120
def tracker_id_is_duplicate(self, tracker_id: int) -> bool:
121
"""Check if tracker ID appears multiple times."""
122
123
def is_equal(self, other: "Detections") -> bool:
124
"""Check equality with another Detections instance."""
125
```
126
127
#### Usage Example
128
129
```python
130
import supervision as sv
131
import cv2
132
from ultralytics import YOLO
133
134
# Load model and image
135
model = YOLO("yolov8n.pt")
136
image = cv2.imread("image.jpg")
137
138
# Get detections
139
results = model(image)[0]
140
detections = sv.Detections.from_ultralytics(results)
141
142
# Access detection data
143
print(f"Found {len(detections)} objects")
144
for xyxy, mask, confidence, class_id, tracker_id, data in detections:
145
print(f"Box: {xyxy}, Confidence: {confidence}, Class: {class_id}")
146
147
# Filter by confidence
148
high_conf = detections[detections.confidence > 0.5]
149
150
# Apply NMS
151
filtered = detections.with_nms(threshold=0.4)
152
```
153
154
### Classifications
155
156
Data structure for classification results from various models.
157
158
```python { .api }
159
@dataclass
160
class Classifications:
161
"""
162
Standardizes classification results.
163
164
Attributes:
165
class_id (np.ndarray): Class IDs, shape (n,)
166
confidence (np.ndarray | None): Classification confidence scores, shape (n,)
167
"""
168
class_id: np.ndarray
169
confidence: np.ndarray | None = None
170
171
def __len__(self) -> int:
172
"""Returns the number of classifications."""
173
174
@classmethod
175
def from_clip(cls, clip_results) -> "Classifications":
176
"""Create from CLIP model results."""
177
```
178
179
#### Usage Example
180
181
```python
182
import supervision as sv
183
184
# Create classifications from raw results
185
class_ids = np.array([0, 1, 2])
186
confidences = np.array([0.95, 0.87, 0.92])
187
classifications = sv.Classifications(
188
class_id=class_ids,
189
confidence=confidences
190
)
191
192
print(f"Number of classifications: {len(classifications)}")
193
```
194
195
### KeyPoints
196
197
Data structure for keypoint detection results.
198
199
```python { .api }
200
class KeyPoints:
201
"""
202
Represents keypoint detection results for pose estimation and facial landmarks.
203
204
Handles keypoint coordinates, visibility, and confidence scores.
205
"""
206
207
@classmethod
208
def from_ultralytics(cls, ultralytics_results) -> "KeyPoints":
209
"""Create from Ultralytics pose estimation results."""
210
211
@classmethod
212
def from_mediapipe(cls, mediapipe_results) -> "KeyPoints":
213
"""Create from MediaPipe results."""
214
```
215
216
#### Usage Example
217
218
```python
219
import supervision as sv
220
from ultralytics import YOLO
221
222
# Load pose estimation model
223
model = YOLO("yolov8n-pose.pt")
224
image = cv2.imread("person.jpg")
225
226
# Get keypoints
227
results = model(image)[0]
228
keypoints = sv.KeyPoints.from_ultralytics(results)
229
```
230
231
## Types
232
233
```python { .api }
234
# Type aliases for common data structures
235
DetectionDataset = Any # Dataset containing detection annotations
236
ClassificationDataset = Any # Dataset containing classification labels
237
238
# Common numpy array shapes used throughout
239
BoundingBoxes = np.ndarray # Shape: (n, 4) - [x1, y1, x2, y2]
240
Masks = np.ndarray # Shape: (n, H, W) - boolean masks
241
Confidences = np.ndarray # Shape: (n,) - confidence scores
242
ClassIds = np.ndarray # Shape: (n,) - integer class identifiers
243
TrackerIds = np.ndarray # Shape: (n,) - integer tracker identifiers
244
```