or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

datasets.mdindex.mdio.mdmodels.mdops.mdtransforms.mdtv_tensors.mdutils.md

io.mddocs/

0

# I/O Operations

1

2

TorchVision I/O module provides efficient image and video reading, writing, and processing capabilities with support for multiple formats and backends. It offers both high-level convenience functions and low-level streaming interfaces for various multimedia formats.

3

4

## Capabilities

5

6

### Image I/O

7

8

#### Image Reading Functions

9

10

Functions for reading various image formats into tensors.

11

12

```python { .api }

13

def read_image(path: str, mode: str = 'RGB') -> torch.Tensor:

14

"""

15

Read image file and return as tensor.

16

17

Args:

18

path (str): Path to image file

19

mode (str): Image mode ('RGB', 'GRAY', 'UNCHANGED')

20

- RGB: Convert to 3-channel RGB

21

- GRAY: Convert to 1-channel grayscale

22

- UNCHANGED: Keep original format

23

24

Returns:

25

torch.Tensor: Image tensor of shape (C, H, W) with values in [0, 255]

26

"""

27

28

def decode_image(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:

29

"""

30

Decode image from bytes tensor.

31

32

Args:

33

input (torch.Tensor): 1-D tensor containing encoded image bytes

34

mode (str): Image mode for decoding

35

36

Returns:

37

torch.Tensor: Decoded image tensor

38

"""

39

40

def decode_jpeg(input: torch.Tensor, mode: str = 'RGB', device: str = 'cpu') -> torch.Tensor:

41

"""

42

Decode JPEG image from bytes.

43

44

Args:

45

input (torch.Tensor): 1-D tensor containing JPEG bytes

46

mode (str): Image mode ('RGB', 'GRAY', 'UNCHANGED')

47

device (str): Device to place output tensor ('cpu' or 'cuda')

48

49

Returns:

50

torch.Tensor: Decoded JPEG image tensor

51

"""

52

53

def decode_png(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:

54

"""

55

Decode PNG image from bytes.

56

57

Args:

58

input (torch.Tensor): 1-D tensor containing PNG bytes

59

mode (str): Image mode for decoding

60

61

Returns:

62

torch.Tensor: Decoded PNG image tensor

63

"""

64

65

def decode_gif(input: torch.Tensor) -> torch.Tensor:

66

"""

67

Decode GIF image from bytes.

68

69

Args:

70

input (torch.Tensor): 1-D tensor containing GIF bytes

71

72

Returns:

73

torch.Tensor: Decoded GIF frames tensor of shape (N, C, H, W)

74

"""

75

76

def decode_webp(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:

77

"""

78

Decode WebP image from bytes.

79

80

Args:

81

input (torch.Tensor): 1-D tensor containing WebP bytes

82

mode (str): Image mode for decoding

83

84

Returns:

85

torch.Tensor: Decoded WebP image tensor

86

"""

87

88

def decode_avif(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:

89

"""

90

Decode AVIF image from bytes.

91

92

Args:

93

input (torch.Tensor): 1-D tensor containing AVIF bytes

94

mode (str): Image mode for decoding

95

96

Returns:

97

torch.Tensor: Decoded AVIF image tensor

98

"""

99

100

def decode_heic(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:

101

"""

102

Decode HEIC image from bytes.

103

104

Args:

105

input (torch.Tensor): 1-D tensor containing HEIC bytes

106

mode (str): Image mode for decoding

107

108

Returns:

109

torch.Tensor: Decoded HEIC image tensor

110

"""

111

```

112

113

#### Image Writing Functions

114

115

Functions for encoding and writing tensors as image files.

116

117

```python { .api }

118

def write_jpeg(input: torch.Tensor, filename: str, quality: int = 75) -> None:

119

"""

120

Write tensor as JPEG file.

121

122

Args:

123

input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]

124

filename (str): Output file path

125

quality (int): JPEG quality (1-100, higher is better quality)

126

"""

127

128

def write_png(input: torch.Tensor, filename: str, compression_level: int = 6) -> None:

129

"""

130

Write tensor as PNG file.

131

132

Args:

133

input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]

134

filename (str): Output file path

135

compression_level (int): PNG compression level (0-9, higher is smaller file)

136

"""

137

138

def encode_jpeg(input: torch.Tensor, quality: int = 75) -> torch.Tensor:

139

"""

140

Encode tensor to JPEG bytes.

141

142

Args:

143

input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]

144

quality (int): JPEG quality (1-100)

145

146

Returns:

147

torch.Tensor: 1-D tensor containing JPEG bytes

148

"""

149

150

def encode_png(input: torch.Tensor, compression_level: int = 6) -> torch.Tensor:

151

"""

152

Encode tensor to PNG bytes.

153

154

Args:

155

input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]

156

compression_level (int): PNG compression level (0-9)

157

158

Returns:

159

torch.Tensor: 1-D tensor containing PNG bytes

160

"""

161

```

162

163

#### File I/O Functions

164

165

Low-level file reading and writing functions.

166

167

```python { .api }

168

def read_file(path: str) -> torch.Tensor:

169

"""

170

Read file contents into bytes tensor.

171

172

Args:

173

path (str): Path to file

174

175

Returns:

176

torch.Tensor: 1-D tensor containing file bytes

177

"""

178

179

def write_file(filename: str, data: torch.Tensor) -> None:

180

"""

181

Write bytes tensor to file.

182

183

Args:

184

filename (str): Output file path

185

data (torch.Tensor): 1-D tensor containing bytes to write

186

"""

187

```

188

189

#### Image Reading Modes

190

191

Constants for specifying image reading modes.

192

193

```python { .api }

194

class ImageReadMode:

195

"""Image reading mode constants."""

196

UNCHANGED: int = 0 # Keep original format and channels

197

GRAY: int = 1 # Convert to single-channel grayscale

198

GRAY_ALPHA: int = 2 # Convert to grayscale with alpha channel

199

RGB: int = 3 # Convert to 3-channel RGB

200

RGB_ALPHA: int = 4 # Convert to RGB with alpha channel

201

```

202

203

### Video I/O

204

205

#### High-Level Video Functions

206

207

Convenient functions for reading and writing video files.

208

209

```python { .api }

210

def read_video(filename: str, start_pts: float = 0, end_pts: float = None, pts_unit: str = 'pts') -> tuple:

211

"""

212

Read video file and return video frames, audio frames, and info.

213

214

Args:

215

filename (str): Path to video file

216

start_pts (float): Start time for reading (in pts_unit)

217

end_pts (float, optional): End time for reading (in pts_unit)

218

pts_unit (str): Time unit ('pts' for presentation timestamp, 'sec' for seconds)

219

220

Returns:

221

tuple: (video_frames, audio_frames, video_info)

222

- video_frames (torch.Tensor): Video tensor of shape (T, H, W, C)

223

- audio_frames (torch.Tensor): Audio tensor of shape (T, C)

224

- video_info (dict): Video metadata including fps, duration, etc.

225

"""

226

227

def read_video_timestamps(filename: str, pts_unit: str = 'pts') -> tuple:

228

"""

229

Read video timestamps without loading frame data.

230

231

Args:

232

filename (str): Path to video file

233

pts_unit (str): Time unit for timestamps

234

235

Returns:

236

tuple: (video_pts, video_fps)

237

- video_pts (list): List of presentation timestamps

238

- video_fps (float): Video frame rate

239

"""

240

241

def write_video(filename: str, video_array: torch.Tensor, fps: float, video_codec: str = 'libx264', options=None) -> None:

242

"""

243

Write video tensor to file.

244

245

Args:

246

filename (str): Output video file path

247

video_array (torch.Tensor): Video tensor of shape (T, H, W, C) with values in [0, 255]

248

fps (float): Frame rate for output video

249

video_codec (str): Video codec to use ('libx264', 'mpeg4', etc.)

250

options (dict, optional): Additional encoding options

251

"""

252

```

253

254

#### Video Reader Class

255

256

Streaming video reader for efficient frame-by-frame processing.

257

258

```python { .api }

259

class VideoReader:

260

"""

261

Video reader for streaming video data frame by frame.

262

263

Args:

264

path (str): Path to video file

265

stream (str): Stream type ('video' or 'audio')

266

"""

267

268

def __init__(self, path: str, stream: str = 'video'): ...

269

270

def get_metadata(self) -> dict:

271

"""

272

Get video metadata information.

273

274

Returns:

275

dict: Metadata including duration, fps, resolution, codec info

276

"""

277

278

def set_current_stream(self, stream: str) -> None:

279

"""

280

Set current stream for reading.

281

282

Args:

283

stream (str): Stream type ('video' or 'audio')

284

"""

285

286

def seek(self, time_s: float) -> None:

287

"""

288

Seek to specific time in video.

289

290

Args:

291

time_s (float): Time in seconds to seek to

292

"""

293

294

def next(self) -> dict:

295

"""

296

Get next frame from video stream.

297

298

Returns:

299

dict: Frame data including 'data' tensor and 'pts' timestamp

300

"""

301

302

def __iter__(self):

303

"""Iterator interface for frame-by-frame reading."""

304

return self

305

306

def __next__(self) -> dict:

307

"""Get next frame in iterator."""

308

```

309

310

#### Low-Level Video Functions

311

312

Internal functions for advanced video processing.

313

314

```python { .api }

315

def _read_video_from_file(filename: str, start_pts: float = 0, end_pts: float = None, pts_unit: str = 'pts') -> tuple:

316

"""

317

Internal video reading from file.

318

319

Args:

320

filename (str): Path to video file

321

start_pts (float): Start time

322

end_pts (float, optional): End time

323

pts_unit (str): Time unit

324

325

Returns:

326

tuple: (video_frames, audio_frames, video_info)

327

"""

328

329

def _read_video_timestamps_from_file(filename: str, pts_unit: str = 'pts') -> tuple:

330

"""

331

Internal timestamp reading from file.

332

333

Args:

334

filename (str): Path to video file

335

pts_unit (str): Time unit

336

337

Returns:

338

tuple: (video_pts, video_fps)

339

"""

340

341

def _read_video_from_memory(video_data: torch.Tensor, start_pts: float = 0, end_pts: float = None, pts_unit: str = 'pts') -> tuple:

342

"""

343

Read video from memory buffer.

344

345

Args:

346

video_data (torch.Tensor): Video data bytes

347

start_pts (float): Start time

348

end_pts (float, optional): End time

349

pts_unit (str): Time unit

350

351

Returns:

352

tuple: (video_frames, audio_frames, video_info)

353

"""

354

355

def _read_video_timestamps_from_memory(video_data: torch.Tensor, pts_unit: str = 'pts') -> tuple:

356

"""

357

Read timestamps from memory buffer.

358

359

Args:

360

video_data (torch.Tensor): Video data bytes

361

pts_unit (str): Time unit

362

363

Returns:

364

tuple: (video_pts, video_fps)

365

"""

366

367

def _probe_video_from_file(filename: str) -> dict:

368

"""

369

Probe video file for metadata without reading frames.

370

371

Args:

372

filename (str): Path to video file

373

374

Returns:

375

dict: Video metadata

376

"""

377

378

def _probe_video_from_memory(video_data: torch.Tensor) -> dict:

379

"""

380

Probe video data for metadata without reading frames.

381

382

Args:

383

video_data (torch.Tensor): Video data bytes

384

385

Returns:

386

dict: Video metadata

387

"""

388

```

389

390

#### Video Metadata Classes

391

392

Classes for representing video metadata and timing information.

393

394

```python { .api }

395

class VideoMetaData:

396

"""

397

Container for video metadata information.

398

399

Attributes:

400

has_video (bool): Whether video stream is present

401

has_audio (bool): Whether audio stream is present

402

video_duration (float): Video duration in seconds

403

video_fps (float): Video frame rate

404

audio_sample_rate (int): Audio sample rate

405

video_codec (str): Video codec name

406

audio_codec (str): Audio codec name

407

"""

408

409

has_video: bool

410

has_audio: bool

411

video_duration: float

412

video_fps: float

413

audio_sample_rate: int

414

video_codec: str

415

audio_codec: str

416

417

class Timebase:

418

"""

419

Video timebase information for timestamp conversion.

420

421

Attributes:

422

numerator (int): Timebase numerator

423

denominator (int): Timebase denominator

424

"""

425

426

numerator: int

427

denominator: int

428

```

429

430

#### Video Backend Flags

431

432

Runtime flags indicating video decoding capabilities.

433

434

```python { .api }

435

_HAS_CPU_VIDEO_DECODER: bool # Whether CPU video decoder is available

436

_HAS_GPU_VIDEO_DECODER: bool # Whether GPU video decoder is available

437

_HAS_VIDEO_OPT: bool # Whether video optimization is available

438

```

439

440

## Usage Examples

441

442

### Basic Image Reading and Writing

443

444

```python

445

import torchvision.io as io

446

import torch

447

448

# Read image from file

449

image = io.read_image('input.jpg', mode='RGB')

450

print(f"Image shape: {image.shape}") # (C, H, W)

451

print(f"Image dtype: {image.dtype}") # torch.uint8

452

453

# Write image to file

454

io.write_jpeg(image, 'output.jpg', quality=95)

455

io.write_png(image, 'output.png', compression_level=3)

456

457

# Read with different modes

458

gray_image = io.read_image('input.jpg', mode='GRAY') # (1, H, W)

459

unchanged_image = io.read_image('input.jpg', mode='UNCHANGED') # Original format

460

```

461

462

### Image Encoding and Decoding

463

464

```python

465

import torchvision.io as io

466

import torch

467

468

# Read file as bytes

469

image_bytes = io.read_file('input.jpg')

470

print(f"File size: {image_bytes.shape[0]} bytes")

471

472

# Decode image from bytes

473

image = io.decode_jpeg(image_bytes, mode='RGB')

474

475

# Encode image back to bytes

476

encoded_jpeg = io.encode_jpeg(image, quality=90)

477

encoded_png = io.encode_png(image, compression_level=6)

478

479

# Write encoded bytes to file

480

io.write_file('output_encoded.jpg', encoded_jpeg)

481

io.write_file('output_encoded.png', encoded_png)

482

```

483

484

### Multi-Format Image Support

485

486

```python

487

import torchvision.io as io

488

489

# Support for various image formats

490

formats = ['jpg', 'png', 'gif', 'webp']

491

492

for fmt in formats:

493

try:

494

# Read image

495

image = io.read_image(f'input.{fmt}')

496

print(f"Successfully read {fmt}: {image.shape}")

497

498

# For GIF, handle multiple frames

499

if fmt == 'gif':

500

# GIF returns (N, C, H, W) for N frames

501

print(f"GIF frames: {image.shape[0]}")

502

503

except Exception as e:

504

print(f"Error reading {fmt}: {e}")

505

```

506

507

### Basic Video Reading

508

509

```python

510

import torchvision.io as io

511

512

# Read entire video

513

video_frames, audio_frames, video_info = io.read_video('input.mp4')

514

515

print(f"Video shape: {video_frames.shape}") # (T, H, W, C)

516

print(f"Audio shape: {audio_frames.shape}") # (T, C)

517

print(f"Video info: {video_info}")

518

519

# Read specific time range (5-10 seconds)

520

video_frames, audio_frames, info = io.read_video(

521

'input.mp4',

522

start_pts=5,

523

end_pts=10,

524

pts_unit='sec'

525

)

526

527

# Get video timestamps without loading frames

528

video_pts, video_fps = io.read_video_timestamps('input.mp4')

529

print(f"Video FPS: {video_fps}")

530

print(f"Number of frames: {len(video_pts)}")

531

```

532

533

### Streaming Video Processing

534

535

```python

536

import torchvision.io as io

537

import torch

538

539

# Create video reader for streaming

540

reader = io.VideoReader('large_video.mp4', 'video')

541

542

# Get metadata

543

metadata = reader.get_metadata()

544

print(f"Duration: {metadata['video']['duration'][0]} seconds")

545

print(f"FPS: {metadata['video']['fps'][0]}")

546

print(f"Resolution: {metadata['video']['width'][0]}x{metadata['video']['height'][0]}")

547

548

# Process video frame by frame

549

frame_count = 0

550

for frame_data in reader:

551

frame = frame_data['data'] # Shape: (C, H, W)

552

pts = frame_data['pts'] # Presentation timestamp

553

554

# Process frame here

555

# For example, apply transforms or run inference

556

557

frame_count += 1

558

if frame_count >= 100: # Process only first 100 frames

559

break

560

561

print(f"Processed {frame_count} frames")

562

563

# Seek to specific time and continue reading

564

reader.seek(30.0) # Seek to 30 seconds

565

frame_data = reader.next()

566

print(f"Frame at 30s has timestamp: {frame_data['pts']}")

567

```

568

569

### Video Writing

570

571

```python

572

import torchvision.io as io

573

import torch

574

575

# Create synthetic video data (100 frames, 480x640, RGB)

576

video_data = torch.randint(0, 256, (100, 480, 640, 3), dtype=torch.uint8)

577

578

# Write video with default settings

579

io.write_video('output.mp4', video_data, fps=30.0)

580

581

# Write with custom codec and options

582

io.write_video(

583

'output_hq.mp4',

584

video_data,

585

fps=30.0,

586

video_codec='libx264',

587

options={'crf': '18', 'preset': 'slow'} # High quality settings

588

)

589

590

# Write with different codec

591

io.write_video(

592

'output_fast.mp4',

593

video_data,

594

fps=30.0,

595

video_codec='mpeg4'

596

)

597

```

598

599

### Video Processing Pipeline

600

601

```python

602

import torchvision.io as io

603

import torchvision.transforms as transforms

604

import torch

605

606

def process_video_batch(input_path, output_path, transform=None):

607

"""

608

Process video by applying transforms to batches of frames.

609

"""

610

# Read video

611

video_frames, audio_frames, info = io.read_video(input_path)

612

613

# Convert from (T, H, W, C) to (T, C, H, W) for transforms

614

video_frames = video_frames.permute(0, 3, 1, 2).float() / 255.0

615

616

# Apply transforms if provided

617

if transform:

618

processed_frames = []

619

for frame in video_frames:

620

processed_frame = transform(frame)

621

processed_frames.append(processed_frame)

622

video_frames = torch.stack(processed_frames)

623

624

# Convert back to (T, H, W, C) and uint8 for writing

625

video_frames = video_frames.permute(0, 2, 3, 1)

626

video_frames = (video_frames * 255).byte()

627

628

# Write processed video

629

io.write_video(output_path, video_frames, fps=info['video_fps'])

630

631

# Define processing pipeline

632

transform = transforms.Compose([

633

transforms.Resize((224, 224)),

634

transforms.ColorJitter(brightness=0.2, contrast=0.2),

635

transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

636

])

637

638

# Process video

639

process_video_batch('input.mp4', 'processed.mp4', transform)

640

```

641

642

### Memory-Efficient Video Processing

643

644

```python

645

import torchvision.io as io

646

import torch

647

648

def process_large_video(input_path, output_path, batch_size=32):

649

"""

650

Process large video in batches to manage memory usage.

651

"""

652

reader = io.VideoReader(input_path, 'video')

653

metadata = reader.get_metadata()

654

fps = metadata['video']['fps'][0]

655

656

processed_frames = []

657

batch = []

658

659

for frame_data in reader:

660

frame = frame_data['data'].float() / 255.0 # Normalize to [0, 1]

661

batch.append(frame)

662

663

# Process batch when full

664

if len(batch) == batch_size:

665

batch_tensor = torch.stack(batch)

666

667

# Apply batch processing here (e.g., model inference)

668

# For example, apply a simple transform

669

processed_batch = torch.flip(batch_tensor, dims=[2]) # Horizontal flip

670

671

processed_frames.extend(processed_batch)

672

batch = []

673

674

# Process remaining frames

675

if batch:

676

batch_tensor = torch.stack(batch)

677

processed_batch = torch.flip(batch_tensor, dims=[2])

678

processed_frames.extend(processed_batch)

679

680

# Stack all processed frames and convert back to uint8

681

all_frames = torch.stack(processed_frames)

682

all_frames = (all_frames * 255).byte().permute(0, 2, 3, 1) # (T, H, W, C)

683

684

# Write output video

685

io.write_video(output_path, all_frames, fps=fps)

686

687

# Process video in batches

688

process_large_video('large_input.mp4', 'large_output.mp4', batch_size=16)

689

```