Tessl Tile for pypi/supervision@0.26.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

annotators.md coordinate-conversion.md core-data-structures.md dataset-management.md detection-tools.md drawing-colors.md file-utilities.md index.md iou-nms.md keypoint-annotators.md metrics.md tracking.md video-processing.md vlm-support.md

video-processing.mddocs/

0
# Video Processing
1

2
Utilities for processing video streams, handling image operations, and managing video input/output workflows. Provides comprehensive tools for video analysis, frame extraction, and image manipulation.
3

4
## Capabilities
5

6
### Video Information and Metadata
7

8
Extract and manage video properties and metadata.
9

10
```python { .api }
11
@dataclass
12
class VideoInfo:
13
    """
14
    A class to store video information, including width, height, fps and total number of frames.
15

16
    Attributes:
17
        width (int): Width of the video in pixels
18
        height (int): Height of the video in pixels  
19
        fps (int): Frames per second of the video
20
        total_frames (int | None): Total number of frames in the video
21
    """
22
    width: int
23
    height: int
24
    fps: int
25
    total_frames: int | None = None
26

27
    @classmethod
28
    def from_video_path(cls, video_path: str) -> "VideoInfo":
29
        """Create VideoInfo from video file path."""
30

31
    @property
32
    def resolution_wh(self) -> tuple[int, int]:
33
        """Get video resolution as (width, height) tuple."""
34
```
35

36
### Video Output and Writing
37

38
Save processed video frames to files with flexible codec support.
39

40
```python { .api }
41
class VideoSink:
42
    """
43
    Context manager that saves video frames to a file using OpenCV.
44

45
    Attributes:
46
        target_path (str): The path to the output file where the video will be saved
47
        video_info (VideoInfo): Information about the video resolution, fps, and total frame count
48
        codec (str): FOURCC code for video format
49
    """
50
    def __init__(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v") -> None: ...
51

52
    def write_frame(self, frame: np.ndarray) -> None:
53
        """
54
        Writes a single video frame to the target video file.
55

56
        Args:
57
            frame: The video frame to be written to the file. Must be in BGR color format
58
        """
59

60
    def __enter__(self): ...
61
    def __exit__(self, exc_type, exc_value, exc_traceback): ...
62
```
63

64
### Video Frame Generation
65

66
Generate frames from video files with flexible control over playback.
67

68
```python { .api }
69
def get_video_frames_generator(
70
    source_path: str,
71
    stride: int = 1,
72
    start: int = 0,
73
    end: int | None = None,
74
    iterative_seek: bool = False
75
) -> Generator[np.ndarray]:
76
    """
77
    Get a generator that yields the frames of the video.
78

79
    Args:
80
        source_path: The path of the video file
81
        stride: Indicates the interval at which frames are returned, skipping stride - 1 frames between each
82
        start: Indicates the starting position from which video should generate frames
83
        end: Indicates the ending position at which video should stop generating frames. If None, video will be read to the end
84
        iterative_seek: If True, the generator will seek to the start frame by grabbing each frame, which is much slower
85

86
    Returns:
87
        A generator that yields the frames of the video
88

89
    Examples:
90
        ```python
91
        import supervision as sv
92

93
        for frame in sv.get_video_frames_generator(source_path="video.mp4"):
94
            # Process each frame
95
            pass
96
        ```
97
    """
98
```
99

100
### Video Processing Pipeline
101

102
Process entire videos with custom callback functions.
103

104
```python { .api }
105
def process_video(
106
    source_path: str,
107
    target_path: str,
108
    callback: Callable[[np.ndarray, int], np.ndarray],
109
    max_frames: int | None = None,
110
    show_progress: bool = False
111
) -> None:
112
    """
113
    Process video with callback function for each frame.
114

115
    Args:
116
        source_path: Path to the input video file
117
        target_path: Path to the output video file
118
        callback: Function that processes each frame. Takes (frame, frame_index) and returns processed frame
119
        max_frames: Maximum number of frames to process. If None, processes entire video
120
        show_progress: Whether to show a progress bar during processing
121

122
    Examples:
123
        ```python
124
        import supervision as sv
125

126
        def callback(scene: np.ndarray, index: int) -> np.ndarray:
127
            # Process frame here
128
            return annotated_scene
129

130
        sv.process_video(
131
            source_path="input.mp4",
132
            target_path="output.mp4", 
133
            callback=callback,
134
            show_progress=True
135
        )
136
        ```
137
    """
138
```
139

140
### Performance Monitoring
141

142
Track processing frame rate and performance metrics.
143

144
```python { .api }
145
class FPSMonitor:
146
    """
147
    Track processing frame rate and performance.
148
    """
149
    def __init__(self, window_size: int = 30) -> None: ...
150

151
    @property
152
    def fps(self) -> float:
153
        """Get current frames per second."""
154

155
    def tick(self) -> None:
156
        """Record a frame processing event."""
157

158
    def reset(self) -> None:
159
        """Reset the FPS monitor."""
160
```
161

162
### Image Processing
163

164
Core image manipulation functions for computer vision workflows.
165

166
```python { .api }
167
class ImageSink:
168
    """
169
    Save image sequences to files with automatic naming and organization.
170
    """
171
    def __init__(
172
        self,
173
        target_dir_path: str,
174
        overwrite: bool = False,
175
        image_name_pattern: str = "image_{:05d}.png"
176
    ) -> None: ...
177

178
    def save_image(self, image: np.ndarray, image_name: str | None = None) -> None:
179
        """Save a single image to the target directory."""
180

181
def crop_image(
182
    image: ImageType,
183
    xyxy: np.ndarray | list[int] | tuple[int, int, int, int]
184
) -> ImageType:
185
    """
186
    Crops the given image based on the given bounding box.
187

188
    Args:
189
        image: The image to be cropped
190
        xyxy: A bounding box coordinates in format (x_min, y_min, x_max, y_max)
191

192
    Returns:
193
        The cropped image matching the input type
194

195
    Examples:
196
        ```python
197
        import cv2
198
        import supervision as sv
199

200
        image = cv2.imread("image.jpg")
201
        xyxy = [200, 400, 600, 800]
202
        cropped_image = sv.crop_image(image=image, xyxy=xyxy)
203
        ```
204
    """
205

206
def scale_image(image: ImageType, scale_factor: float) -> ImageType:
207
    """
208
    Scales the given image based on the given scale factor.
209

210
    Args:
211
        image: The image to be scaled
212
        scale_factor: The factor by which the image will be scaled
213

214
    Returns:
215
        The scaled image matching the input type
216
    """
217

218
def resize_image(
219
    image: ImageType,
220
    resolution_wh: tuple[int, int]
221
) -> ImageType:
222
    """
223
    Resize image to target resolution.
224

225
    Args:
226
        image: The image to be resized
227
        resolution_wh: Target resolution as (width, height)
228

229
    Returns:
230
        The resized image matching the input type
231
    """
232

233
def letterbox_image(
234
    image: ImageType,
235
    resolution_wh: tuple[int, int],
236
    color: tuple[int, int, int] = (114, 114, 114)
237
) -> ImageType:
238
    """
239
    Resize image with letterboxing (padding) to maintain aspect ratio.
240

241
    Args:
242
        image: The image to be letterboxed
243
        resolution_wh: Target resolution as (width, height)  
244
        color: RGB color for padding areas
245

246
    Returns:
247
        The letterboxed image matching the input type
248
    """
249

250
def overlay_image(
251
    background: ImageType,
252
    overlay: ImageType,
253
    anchor: Point,
254
    scale_factor: float = 1.0
255
) -> ImageType:
256
    """
257
    Overlay one image onto another at specified position.
258

259
    Args:
260
        background: The background image
261
        overlay: The image to overlay
262
        anchor: Position where to place the overlay
263
        scale_factor: Scale factor for the overlay image
264

265
    Returns:
266
        The combined image matching the background type
267
    """
268

269
def create_tiles(
270
    images: list[ImageType],
271
    grid_size: tuple[int, int] | None = None,
272
    single_tile_size: tuple[int, int] | None = None,
273
    titles: list[str] | None = None,
274
    titles_scale: float = 1.0,
275
    border_color: tuple[int, int, int] = (0, 0, 0),
276
    border_thickness: int = 5
277
) -> ImageType:
278
    """
279
    Create image tile grids for visualization.
280

281
    Args:
282
        images: List of images to combine into tiles
283
        grid_size: Grid dimensions as (columns, rows). If None, automatically determined
284
        single_tile_size: Size of each tile as (width, height). If None, uses original sizes
285
        titles: Optional list of titles for each image
286
        titles_scale: Scale factor for title text
287
        border_color: RGB color for borders between tiles
288
        border_thickness: Thickness of borders in pixels
289

290
    Returns:
291
        Combined image grid
292
    """
293
```
294

295
## Usage Examples
296

297
### Video Processing Pipeline
298

299
```python
300
import supervision as sv
301
import cv2
302

303
def process_frame(frame: np.ndarray, frame_index: int) -> np.ndarray:
304
    # Apply object detection
305
    detections = model(frame)
306
    
307
    # Annotate frame
308
    annotated_frame = annotator.annotate(frame, detections)
309
    
310
    return annotated_frame
311

312
# Process entire video
313
sv.process_video(
314
    source_path="input_video.mp4",
315
    target_path="output_video.mp4",
316
    callback=process_frame,
317
    show_progress=True
318
)
319
```
320

321
### Manual Video Processing with Performance Monitoring
322

323
```python
324
import supervision as sv
325

326
# Get video info
327
video_info = sv.VideoInfo.from_video_path("input.mp4")
328
fps_monitor = sv.FPSMonitor()
329

330
# Process frames manually
331
frames_generator = sv.get_video_frames_generator("input.mp4")
332

333
with sv.VideoSink("output.mp4", video_info) as sink:
334
    for frame in frames_generator:
335
        # Process frame
336
        processed_frame = your_processing_function(frame)
337
        
338
        # Write to output
339
        sink.write_frame(processed_frame)
340
        
341
        # Update FPS tracking
342
        fps_monitor.tick()
343
        
344
        print(f"Processing at {fps_monitor.fps:.1f} FPS")
345
```
346

347
### Image Manipulation Workflows
348

349
```python
350
import supervision as sv
351
import cv2
352

353
# Load images
354
image1 = cv2.imread("image1.jpg")
355
image2 = cv2.imread("image2.jpg")
356

357
# Crop regions of interest
358
bbox = [100, 100, 500, 400]
359
cropped = sv.crop_image(image1, bbox)
360

361
# Scale and resize operations  
362
scaled = sv.scale_image(image2, scale_factor=0.5)
363
resized = sv.resize_image(scaled, resolution_wh=(640, 480))
364

365
# Create comparison grid
366
images = [image1, cropped, scaled, resized]
367
titles = ["Original", "Cropped", "Scaled", "Resized"]
368

369
tile_grid = sv.create_tiles(
370
    images=images,
371
    titles=titles,
372
    grid_size=(2, 2),
373
    single_tile_size=(320, 240)
374
)
375

376
cv2.imshow("Image Processing Results", tile_grid)
377
cv2.waitKey(0)
378
```
379

380
### Video Frame Extraction
381

382
```python
383
import supervision as sv
384

385
# Extract every 10th frame from a video segment
386
frames = []
387
for i, frame in enumerate(sv.get_video_frames_generator(
388
    source_path="video.mp4",
389
    stride=10,      # Every 10th frame
390
    start=1000,     # Start at frame 1000
391
    end=2000        # End at frame 2000
392
)):
393
    frames.append(frame)
394
    if len(frames) >= 100:  # Limit to 100 frames
395
        break
396

397
print(f"Extracted {len(frames)} frames")
398

399
# Save frames using ImageSink
400
with sv.ImageSink("./extracted_frames/") as sink:
401
    for i, frame in enumerate(frames):
402
        sink.save_image(frame, f"frame_{i:05d}.jpg")
403
```

Version

Tile

Files

video-processing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

video-processing.mddocs/