or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

annotators.mdcoordinate-conversion.mdcore-data-structures.mddataset-management.mddetection-tools.mddrawing-colors.mdfile-utilities.mdindex.mdiou-nms.mdkeypoint-annotators.mdmetrics.mdtracking.mdvideo-processing.mdvlm-support.md

video-processing.mddocs/

0

# Video Processing

1

2

Utilities for processing video streams, handling image operations, and managing video input/output workflows. Provides comprehensive tools for video analysis, frame extraction, and image manipulation.

3

4

## Capabilities

5

6

### Video Information and Metadata

7

8

Extract and manage video properties and metadata.

9

10

```python { .api }

11

@dataclass

12

class VideoInfo:

13

"""

14

A class to store video information, including width, height, fps and total number of frames.

15

16

Attributes:

17

width (int): Width of the video in pixels

18

height (int): Height of the video in pixels

19

fps (int): Frames per second of the video

20

total_frames (int | None): Total number of frames in the video

21

"""

22

width: int

23

height: int

24

fps: int

25

total_frames: int | None = None

26

27

@classmethod

28

def from_video_path(cls, video_path: str) -> "VideoInfo":

29

"""Create VideoInfo from video file path."""

30

31

@property

32

def resolution_wh(self) -> tuple[int, int]:

33

"""Get video resolution as (width, height) tuple."""

34

```

35

36

### Video Output and Writing

37

38

Save processed video frames to files with flexible codec support.

39

40

```python { .api }

41

class VideoSink:

42

"""

43

Context manager that saves video frames to a file using OpenCV.

44

45

Attributes:

46

target_path (str): The path to the output file where the video will be saved

47

video_info (VideoInfo): Information about the video resolution, fps, and total frame count

48

codec (str): FOURCC code for video format

49

"""

50

def __init__(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v") -> None: ...

51

52

def write_frame(self, frame: np.ndarray) -> None:

53

"""

54

Writes a single video frame to the target video file.

55

56

Args:

57

frame: The video frame to be written to the file. Must be in BGR color format

58

"""

59

60

def __enter__(self): ...

61

def __exit__(self, exc_type, exc_value, exc_traceback): ...

62

```

63

64

### Video Frame Generation

65

66

Generate frames from video files with flexible control over playback.

67

68

```python { .api }

69

def get_video_frames_generator(

70

source_path: str,

71

stride: int = 1,

72

start: int = 0,

73

end: int | None = None,

74

iterative_seek: bool = False

75

) -> Generator[np.ndarray]:

76

"""

77

Get a generator that yields the frames of the video.

78

79

Args:

80

source_path: The path of the video file

81

stride: Indicates the interval at which frames are returned, skipping stride - 1 frames between each

82

start: Indicates the starting position from which video should generate frames

83

end: Indicates the ending position at which video should stop generating frames. If None, video will be read to the end

84

iterative_seek: If True, the generator will seek to the start frame by grabbing each frame, which is much slower

85

86

Returns:

87

A generator that yields the frames of the video

88

89

Examples:

90

```python

91

import supervision as sv

92

93

for frame in sv.get_video_frames_generator(source_path="video.mp4"):

94

# Process each frame

95

pass

96

```

97

"""

98

```

99

100

### Video Processing Pipeline

101

102

Process entire videos with custom callback functions.

103

104

```python { .api }

105

def process_video(

106

source_path: str,

107

target_path: str,

108

callback: Callable[[np.ndarray, int], np.ndarray],

109

max_frames: int | None = None,

110

show_progress: bool = False

111

) -> None:

112

"""

113

Process video with callback function for each frame.

114

115

Args:

116

source_path: Path to the input video file

117

target_path: Path to the output video file

118

callback: Function that processes each frame. Takes (frame, frame_index) and returns processed frame

119

max_frames: Maximum number of frames to process. If None, processes entire video

120

show_progress: Whether to show a progress bar during processing

121

122

Examples:

123

```python

124

import supervision as sv

125

126

def callback(scene: np.ndarray, index: int) -> np.ndarray:

127

# Process frame here

128

return annotated_scene

129

130

sv.process_video(

131

source_path="input.mp4",

132

target_path="output.mp4",

133

callback=callback,

134

show_progress=True

135

)

136

```

137

"""

138

```

139

140

### Performance Monitoring

141

142

Track processing frame rate and performance metrics.

143

144

```python { .api }

145

class FPSMonitor:

146

"""

147

Track processing frame rate and performance.

148

"""

149

def __init__(self, window_size: int = 30) -> None: ...

150

151

@property

152

def fps(self) -> float:

153

"""Get current frames per second."""

154

155

def tick(self) -> None:

156

"""Record a frame processing event."""

157

158

def reset(self) -> None:

159

"""Reset the FPS monitor."""

160

```

161

162

### Image Processing

163

164

Core image manipulation functions for computer vision workflows.

165

166

```python { .api }

167

class ImageSink:

168

"""

169

Save image sequences to files with automatic naming and organization.

170

"""

171

def __init__(

172

self,

173

target_dir_path: str,

174

overwrite: bool = False,

175

image_name_pattern: str = "image_{:05d}.png"

176

) -> None: ...

177

178

def save_image(self, image: np.ndarray, image_name: str | None = None) -> None:

179

"""Save a single image to the target directory."""

180

181

def crop_image(

182

image: ImageType,

183

xyxy: np.ndarray | list[int] | tuple[int, int, int, int]

184

) -> ImageType:

185

"""

186

Crops the given image based on the given bounding box.

187

188

Args:

189

image: The image to be cropped

190

xyxy: A bounding box coordinates in format (x_min, y_min, x_max, y_max)

191

192

Returns:

193

The cropped image matching the input type

194

195

Examples:

196

```python

197

import cv2

198

import supervision as sv

199

200

image = cv2.imread("image.jpg")

201

xyxy = [200, 400, 600, 800]

202

cropped_image = sv.crop_image(image=image, xyxy=xyxy)

203

```

204

"""

205

206

def scale_image(image: ImageType, scale_factor: float) -> ImageType:

207

"""

208

Scales the given image based on the given scale factor.

209

210

Args:

211

image: The image to be scaled

212

scale_factor: The factor by which the image will be scaled

213

214

Returns:

215

The scaled image matching the input type

216

"""

217

218

def resize_image(

219

image: ImageType,

220

resolution_wh: tuple[int, int]

221

) -> ImageType:

222

"""

223

Resize image to target resolution.

224

225

Args:

226

image: The image to be resized

227

resolution_wh: Target resolution as (width, height)

228

229

Returns:

230

The resized image matching the input type

231

"""

232

233

def letterbox_image(

234

image: ImageType,

235

resolution_wh: tuple[int, int],

236

color: tuple[int, int, int] = (114, 114, 114)

237

) -> ImageType:

238

"""

239

Resize image with letterboxing (padding) to maintain aspect ratio.

240

241

Args:

242

image: The image to be letterboxed

243

resolution_wh: Target resolution as (width, height)

244

color: RGB color for padding areas

245

246

Returns:

247

The letterboxed image matching the input type

248

"""

249

250

def overlay_image(

251

background: ImageType,

252

overlay: ImageType,

253

anchor: Point,

254

scale_factor: float = 1.0

255

) -> ImageType:

256

"""

257

Overlay one image onto another at specified position.

258

259

Args:

260

background: The background image

261

overlay: The image to overlay

262

anchor: Position where to place the overlay

263

scale_factor: Scale factor for the overlay image

264

265

Returns:

266

The combined image matching the background type

267

"""

268

269

def create_tiles(

270

images: list[ImageType],

271

grid_size: tuple[int, int] | None = None,

272

single_tile_size: tuple[int, int] | None = None,

273

titles: list[str] | None = None,

274

titles_scale: float = 1.0,

275

border_color: tuple[int, int, int] = (0, 0, 0),

276

border_thickness: int = 5

277

) -> ImageType:

278

"""

279

Create image tile grids for visualization.

280

281

Args:

282

images: List of images to combine into tiles

283

grid_size: Grid dimensions as (columns, rows). If None, automatically determined

284

single_tile_size: Size of each tile as (width, height). If None, uses original sizes

285

titles: Optional list of titles for each image

286

titles_scale: Scale factor for title text

287

border_color: RGB color for borders between tiles

288

border_thickness: Thickness of borders in pixels

289

290

Returns:

291

Combined image grid

292

"""

293

```

294

295

## Usage Examples

296

297

### Video Processing Pipeline

298

299

```python

300

import supervision as sv

301

import cv2

302

303

def process_frame(frame: np.ndarray, frame_index: int) -> np.ndarray:

304

# Apply object detection

305

detections = model(frame)

306

307

# Annotate frame

308

annotated_frame = annotator.annotate(frame, detections)

309

310

return annotated_frame

311

312

# Process entire video

313

sv.process_video(

314

source_path="input_video.mp4",

315

target_path="output_video.mp4",

316

callback=process_frame,

317

show_progress=True

318

)

319

```

320

321

### Manual Video Processing with Performance Monitoring

322

323

```python

324

import supervision as sv

325

326

# Get video info

327

video_info = sv.VideoInfo.from_video_path("input.mp4")

328

fps_monitor = sv.FPSMonitor()

329

330

# Process frames manually

331

frames_generator = sv.get_video_frames_generator("input.mp4")

332

333

with sv.VideoSink("output.mp4", video_info) as sink:

334

for frame in frames_generator:

335

# Process frame

336

processed_frame = your_processing_function(frame)

337

338

# Write to output

339

sink.write_frame(processed_frame)

340

341

# Update FPS tracking

342

fps_monitor.tick()

343

344

print(f"Processing at {fps_monitor.fps:.1f} FPS")

345

```

346

347

### Image Manipulation Workflows

348

349

```python

350

import supervision as sv

351

import cv2

352

353

# Load images

354

image1 = cv2.imread("image1.jpg")

355

image2 = cv2.imread("image2.jpg")

356

357

# Crop regions of interest

358

bbox = [100, 100, 500, 400]

359

cropped = sv.crop_image(image1, bbox)

360

361

# Scale and resize operations

362

scaled = sv.scale_image(image2, scale_factor=0.5)

363

resized = sv.resize_image(scaled, resolution_wh=(640, 480))

364

365

# Create comparison grid

366

images = [image1, cropped, scaled, resized]

367

titles = ["Original", "Cropped", "Scaled", "Resized"]

368

369

tile_grid = sv.create_tiles(

370

images=images,

371

titles=titles,

372

grid_size=(2, 2),

373

single_tile_size=(320, 240)

374

)

375

376

cv2.imshow("Image Processing Results", tile_grid)

377

cv2.waitKey(0)

378

```

379

380

### Video Frame Extraction

381

382

```python

383

import supervision as sv

384

385

# Extract every 10th frame from a video segment

386

frames = []

387

for i, frame in enumerate(sv.get_video_frames_generator(

388

source_path="video.mp4",

389

stride=10, # Every 10th frame

390

start=1000, # Start at frame 1000

391

end=2000 # End at frame 2000

392

)):

393

frames.append(frame)

394

if len(frames) >= 100: # Limit to 100 frames

395

break

396

397

print(f"Extracted {len(frames)} frames")

398

399

# Save frames using ImageSink

400

with sv.ImageSink("./extracted_frames/") as sink:

401

for i, frame in enumerate(frames):

402

sink.save_image(frame, f"frame_{i:05d}.jpg")

403

```