Tessl Tile for pypi/torchvision@0.23.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

datasets.md index.md io.md models.md ops.md transforms.md tv_tensors.md utils.md

io.mddocs/

0
# I/O Operations
1

2
TorchVision I/O module provides efficient image and video reading, writing, and processing capabilities with support for multiple formats and backends. It offers both high-level convenience functions and low-level streaming interfaces for various multimedia formats.
3

4
## Capabilities
5

6
### Image I/O
7

8
#### Image Reading Functions
9

10
Functions for reading various image formats into tensors.
11

12
```python { .api }
13
def read_image(path: str, mode: str = 'RGB') -> torch.Tensor:
14
    """
15
    Read image file and return as tensor.
16
    
17
    Args:
18
        path (str): Path to image file
19
        mode (str): Image mode ('RGB', 'GRAY', 'UNCHANGED')
20
                   - RGB: Convert to 3-channel RGB
21
                   - GRAY: Convert to 1-channel grayscale  
22
                   - UNCHANGED: Keep original format
23
    
24
    Returns:
25
        torch.Tensor: Image tensor of shape (C, H, W) with values in [0, 255]
26
    """
27

28
def decode_image(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:
29
    """
30
    Decode image from bytes tensor.
31
    
32
    Args:
33
        input (torch.Tensor): 1-D tensor containing encoded image bytes
34
        mode (str): Image mode for decoding
35
    
36
    Returns:
37
        torch.Tensor: Decoded image tensor
38
    """
39

40
def decode_jpeg(input: torch.Tensor, mode: str = 'RGB', device: str = 'cpu') -> torch.Tensor:
41
    """
42
    Decode JPEG image from bytes.
43
    
44
    Args:
45
        input (torch.Tensor): 1-D tensor containing JPEG bytes
46
        mode (str): Image mode ('RGB', 'GRAY', 'UNCHANGED')  
47
        device (str): Device to place output tensor ('cpu' or 'cuda')
48
    
49
    Returns:
50
        torch.Tensor: Decoded JPEG image tensor
51
    """
52

53
def decode_png(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:
54
    """
55
    Decode PNG image from bytes.
56
    
57
    Args:
58
        input (torch.Tensor): 1-D tensor containing PNG bytes
59
        mode (str): Image mode for decoding
60
    
61
    Returns:
62
        torch.Tensor: Decoded PNG image tensor
63
    """
64

65
def decode_gif(input: torch.Tensor) -> torch.Tensor:
66
    """
67
    Decode GIF image from bytes.
68
    
69
    Args:
70
        input (torch.Tensor): 1-D tensor containing GIF bytes
71
    
72
    Returns:
73
        torch.Tensor: Decoded GIF frames tensor of shape (N, C, H, W)
74
    """
75

76
def decode_webp(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:
77
    """
78
    Decode WebP image from bytes.
79
    
80
    Args:
81
        input (torch.Tensor): 1-D tensor containing WebP bytes  
82
        mode (str): Image mode for decoding
83
    
84
    Returns:
85
        torch.Tensor: Decoded WebP image tensor
86
    """
87

88
def decode_avif(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:
89
    """
90
    Decode AVIF image from bytes.
91
    
92
    Args:
93
        input (torch.Tensor): 1-D tensor containing AVIF bytes
94
        mode (str): Image mode for decoding
95
    
96
    Returns:
97
        torch.Tensor: Decoded AVIF image tensor
98
    """
99

100
def decode_heic(input: torch.Tensor, mode: str = 'RGB') -> torch.Tensor:
101
    """
102
    Decode HEIC image from bytes.
103
    
104
    Args:
105
        input (torch.Tensor): 1-D tensor containing HEIC bytes
106
        mode (str): Image mode for decoding
107
    
108
    Returns:
109
        torch.Tensor: Decoded HEIC image tensor
110
    """
111
```
112

113
#### Image Writing Functions
114

115
Functions for encoding and writing tensors as image files.
116

117
```python { .api }
118
def write_jpeg(input: torch.Tensor, filename: str, quality: int = 75) -> None:
119
    """
120
    Write tensor as JPEG file.
121
    
122
    Args:
123
        input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]
124
        filename (str): Output file path
125
        quality (int): JPEG quality (1-100, higher is better quality)
126
    """
127

128
def write_png(input: torch.Tensor, filename: str, compression_level: int = 6) -> None:
129
    """
130
    Write tensor as PNG file.
131
    
132
    Args:
133
        input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]
134
        filename (str): Output file path  
135
        compression_level (int): PNG compression level (0-9, higher is smaller file)
136
    """
137

138
def encode_jpeg(input: torch.Tensor, quality: int = 75) -> torch.Tensor:
139
    """
140
    Encode tensor to JPEG bytes.
141
    
142
    Args:
143
        input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]
144
        quality (int): JPEG quality (1-100)
145
    
146
    Returns:
147
        torch.Tensor: 1-D tensor containing JPEG bytes
148
    """
149

150
def encode_png(input: torch.Tensor, compression_level: int = 6) -> torch.Tensor:
151
    """
152
    Encode tensor to PNG bytes.
153
    
154
    Args:
155
        input (torch.Tensor): Image tensor of shape (C, H, W) with values in [0, 255]  
156
        compression_level (int): PNG compression level (0-9)
157
    
158
    Returns:
159
        torch.Tensor: 1-D tensor containing PNG bytes
160
    """
161
```
162

163
#### File I/O Functions
164

165
Low-level file reading and writing functions.
166

167
```python { .api }
168
def read_file(path: str) -> torch.Tensor:
169
    """
170
    Read file contents into bytes tensor.
171
    
172
    Args:
173
        path (str): Path to file
174
    
175
    Returns:
176
        torch.Tensor: 1-D tensor containing file bytes
177
    """
178

179
def write_file(filename: str, data: torch.Tensor) -> None:
180
    """
181
    Write bytes tensor to file.
182
    
183
    Args:
184
        filename (str): Output file path
185
        data (torch.Tensor): 1-D tensor containing bytes to write
186
    """
187
```
188

189
#### Image Reading Modes
190

191
Constants for specifying image reading modes.
192

193
```python { .api }
194
class ImageReadMode:
195
    """Image reading mode constants."""
196
    UNCHANGED: int = 0    # Keep original format and channels
197
    GRAY: int = 1         # Convert to single-channel grayscale
198
    GRAY_ALPHA: int = 2   # Convert to grayscale with alpha channel
199
    RGB: int = 3          # Convert to 3-channel RGB
200
    RGB_ALPHA: int = 4    # Convert to RGB with alpha channel
201
```
202

203
### Video I/O
204

205
#### High-Level Video Functions
206

207
Convenient functions for reading and writing video files.
208

209
```python { .api }
210
def read_video(filename: str, start_pts: float = 0, end_pts: float = None, pts_unit: str = 'pts') -> tuple:
211
    """
212
    Read video file and return video frames, audio frames, and info.
213
    
214
    Args:
215
        filename (str): Path to video file
216
        start_pts (float): Start time for reading (in pts_unit)
217
        end_pts (float, optional): End time for reading (in pts_unit)  
218
        pts_unit (str): Time unit ('pts' for presentation timestamp, 'sec' for seconds)
219
    
220
    Returns:
221
        tuple: (video_frames, audio_frames, video_info)
222
            - video_frames (torch.Tensor): Video tensor of shape (T, H, W, C)
223
            - audio_frames (torch.Tensor): Audio tensor of shape (T, C) 
224
            - video_info (dict): Video metadata including fps, duration, etc.
225
    """
226

227
def read_video_timestamps(filename: str, pts_unit: str = 'pts') -> tuple:
228
    """
229
    Read video timestamps without loading frame data.
230
    
231
    Args:
232
        filename (str): Path to video file
233
        pts_unit (str): Time unit for timestamps
234
    
235
    Returns:
236
        tuple: (video_pts, video_fps) 
237
            - video_pts (list): List of presentation timestamps
238
            - video_fps (float): Video frame rate
239
    """
240

241
def write_video(filename: str, video_array: torch.Tensor, fps: float, video_codec: str = 'libx264', options=None) -> None:
242
    """
243
    Write video tensor to file.
244
    
245
    Args:
246
        filename (str): Output video file path
247
        video_array (torch.Tensor): Video tensor of shape (T, H, W, C) with values in [0, 255]
248
        fps (float): Frame rate for output video
249
        video_codec (str): Video codec to use ('libx264', 'mpeg4', etc.)
250
        options (dict, optional): Additional encoding options
251
    """
252
```
253

254
#### Video Reader Class
255

256
Streaming video reader for efficient frame-by-frame processing.
257

258
```python { .api }
259
class VideoReader:
260
    """
261
    Video reader for streaming video data frame by frame.
262
    
263
    Args:
264
        path (str): Path to video file
265
        stream (str): Stream type ('video' or 'audio')
266
    """
267
    
268
    def __init__(self, path: str, stream: str = 'video'): ...
269
    
270
    def get_metadata(self) -> dict:
271
        """
272
        Get video metadata information.
273
        
274
        Returns:
275
            dict: Metadata including duration, fps, resolution, codec info
276
        """
277
    
278
    def set_current_stream(self, stream: str) -> None:
279
        """
280
        Set current stream for reading.
281
        
282
        Args:
283
            stream (str): Stream type ('video' or 'audio')
284
        """
285
    
286
    def seek(self, time_s: float) -> None:
287
        """
288
        Seek to specific time in video.
289
        
290
        Args:
291
            time_s (float): Time in seconds to seek to
292
        """
293
    
294
    def next(self) -> dict:
295
        """
296
        Get next frame from video stream.
297
        
298
        Returns:
299
            dict: Frame data including 'data' tensor and 'pts' timestamp
300
        """
301
    
302
    def __iter__(self):
303
        """Iterator interface for frame-by-frame reading."""
304
        return self
305
    
306
    def __next__(self) -> dict:
307
        """Get next frame in iterator."""
308
```
309

310
#### Low-Level Video Functions
311

312
Internal functions for advanced video processing.
313

314
```python { .api }
315
def _read_video_from_file(filename: str, start_pts: float = 0, end_pts: float = None, pts_unit: str = 'pts') -> tuple:
316
    """
317
    Internal video reading from file.
318
    
319
    Args:
320
        filename (str): Path to video file
321
        start_pts (float): Start time
322
        end_pts (float, optional): End time
323
        pts_unit (str): Time unit
324
    
325
    Returns:
326
        tuple: (video_frames, audio_frames, video_info)
327
    """
328

329
def _read_video_timestamps_from_file(filename: str, pts_unit: str = 'pts') -> tuple:
330
    """
331
    Internal timestamp reading from file.
332
    
333
    Args:
334
        filename (str): Path to video file
335
        pts_unit (str): Time unit
336
    
337
    Returns:
338
        tuple: (video_pts, video_fps)
339
    """
340

341
def _read_video_from_memory(video_data: torch.Tensor, start_pts: float = 0, end_pts: float = None, pts_unit: str = 'pts') -> tuple:
342
    """
343
    Read video from memory buffer.
344
    
345
    Args:
346
        video_data (torch.Tensor): Video data bytes
347
        start_pts (float): Start time
348
        end_pts (float, optional): End time
349
        pts_unit (str): Time unit
350
    
351
    Returns:
352
        tuple: (video_frames, audio_frames, video_info)
353
    """
354

355
def _read_video_timestamps_from_memory(video_data: torch.Tensor, pts_unit: str = 'pts') -> tuple:
356
    """
357
    Read timestamps from memory buffer.
358
    
359
    Args:
360
        video_data (torch.Tensor): Video data bytes
361
        pts_unit (str): Time unit
362
    
363
    Returns:
364
        tuple: (video_pts, video_fps)
365
    """
366

367
def _probe_video_from_file(filename: str) -> dict:
368
    """
369
    Probe video file for metadata without reading frames.
370
    
371
    Args:
372
        filename (str): Path to video file
373
    
374
    Returns:
375
        dict: Video metadata
376
    """
377

378
def _probe_video_from_memory(video_data: torch.Tensor) -> dict:
379
    """
380
    Probe video data for metadata without reading frames.
381
    
382
    Args:
383
        video_data (torch.Tensor): Video data bytes
384
    
385
    Returns:
386
        dict: Video metadata
387
    """
388
```
389

390
#### Video Metadata Classes
391

392
Classes for representing video metadata and timing information.
393

394
```python { .api }
395
class VideoMetaData:
396
    """
397
    Container for video metadata information.
398
    
399
    Attributes:
400
        has_video (bool): Whether video stream is present
401
        has_audio (bool): Whether audio stream is present
402
        video_duration (float): Video duration in seconds
403
        video_fps (float): Video frame rate
404
        audio_sample_rate (int): Audio sample rate
405
        video_codec (str): Video codec name
406
        audio_codec (str): Audio codec name
407
    """
408
    
409
    has_video: bool
410
    has_audio: bool
411
    video_duration: float
412
    video_fps: float
413
    audio_sample_rate: int
414
    video_codec: str
415
    audio_codec: str
416

417
class Timebase:
418
    """
419
    Video timebase information for timestamp conversion.
420
    
421
    Attributes:
422
        numerator (int): Timebase numerator
423
        denominator (int): Timebase denominator
424
    """
425
    
426
    numerator: int
427
    denominator: int
428
```
429

430
#### Video Backend Flags
431

432
Runtime flags indicating video decoding capabilities.
433

434
```python { .api }
435
_HAS_CPU_VIDEO_DECODER: bool  # Whether CPU video decoder is available
436
_HAS_GPU_VIDEO_DECODER: bool  # Whether GPU video decoder is available
437
_HAS_VIDEO_OPT: bool          # Whether video optimization is available
438
```
439

440
## Usage Examples
441

442
### Basic Image Reading and Writing
443

444
```python
445
import torchvision.io as io
446
import torch
447

448
# Read image from file
449
image = io.read_image('input.jpg', mode='RGB')
450
print(f"Image shape: {image.shape}")  # (C, H, W)
451
print(f"Image dtype: {image.dtype}")  # torch.uint8
452

453
# Write image to file
454
io.write_jpeg(image, 'output.jpg', quality=95)
455
io.write_png(image, 'output.png', compression_level=3)
456

457
# Read with different modes
458
gray_image = io.read_image('input.jpg', mode='GRAY')  # (1, H, W)
459
unchanged_image = io.read_image('input.jpg', mode='UNCHANGED')  # Original format
460
```
461

462
### Image Encoding and Decoding
463

464
```python
465
import torchvision.io as io
466
import torch
467

468
# Read file as bytes
469
image_bytes = io.read_file('input.jpg')
470
print(f"File size: {image_bytes.shape[0]} bytes")
471

472
# Decode image from bytes
473
image = io.decode_jpeg(image_bytes, mode='RGB')
474

475
# Encode image back to bytes
476
encoded_jpeg = io.encode_jpeg(image, quality=90)
477
encoded_png = io.encode_png(image, compression_level=6)
478

479
# Write encoded bytes to file
480
io.write_file('output_encoded.jpg', encoded_jpeg)
481
io.write_file('output_encoded.png', encoded_png)
482
```
483

484
### Multi-Format Image Support
485

486
```python
487
import torchvision.io as io
488

489
# Support for various image formats
490
formats = ['jpg', 'png', 'gif', 'webp']
491

492
for fmt in formats:
493
    try:
494
        # Read image
495
        image = io.read_image(f'input.{fmt}')
496
        print(f"Successfully read {fmt}: {image.shape}")
497
        
498
        # For GIF, handle multiple frames
499
        if fmt == 'gif':
500
            # GIF returns (N, C, H, W) for N frames
501
            print(f"GIF frames: {image.shape[0]}")
502
            
503
    except Exception as e:
504
        print(f"Error reading {fmt}: {e}")
505
```
506

507
### Basic Video Reading
508

509
```python
510
import torchvision.io as io
511

512
# Read entire video
513
video_frames, audio_frames, video_info = io.read_video('input.mp4')
514

515
print(f"Video shape: {video_frames.shape}")  # (T, H, W, C)
516
print(f"Audio shape: {audio_frames.shape}")  # (T, C)
517
print(f"Video info: {video_info}")
518

519
# Read specific time range (5-10 seconds)
520
video_frames, audio_frames, info = io.read_video(
521
    'input.mp4', 
522
    start_pts=5, 
523
    end_pts=10, 
524
    pts_unit='sec'
525
)
526

527
# Get video timestamps without loading frames
528
video_pts, video_fps = io.read_video_timestamps('input.mp4')
529
print(f"Video FPS: {video_fps}")
530
print(f"Number of frames: {len(video_pts)}")
531
```
532

533
### Streaming Video Processing
534

535
```python
536
import torchvision.io as io
537
import torch
538

539
# Create video reader for streaming
540
reader = io.VideoReader('large_video.mp4', 'video')
541

542
# Get metadata
543
metadata = reader.get_metadata()
544
print(f"Duration: {metadata['video']['duration'][0]} seconds")
545
print(f"FPS: {metadata['video']['fps'][0]}")
546
print(f"Resolution: {metadata['video']['width'][0]}x{metadata['video']['height'][0]}")
547

548
# Process video frame by frame
549
frame_count = 0
550
for frame_data in reader:
551
    frame = frame_data['data']  # Shape: (C, H, W)
552
    pts = frame_data['pts']     # Presentation timestamp
553
    
554
    # Process frame here
555
    # For example, apply transforms or run inference
556
    
557
    frame_count += 1
558
    if frame_count >= 100:  # Process only first 100 frames
559
        break
560

561
print(f"Processed {frame_count} frames")
562

563
# Seek to specific time and continue reading
564
reader.seek(30.0)  # Seek to 30 seconds
565
frame_data = reader.next()
566
print(f"Frame at 30s has timestamp: {frame_data['pts']}")
567
```
568

569
### Video Writing
570

571
```python
572
import torchvision.io as io
573
import torch
574

575
# Create synthetic video data (100 frames, 480x640, RGB)
576
video_data = torch.randint(0, 256, (100, 480, 640, 3), dtype=torch.uint8)
577

578
# Write video with default settings
579
io.write_video('output.mp4', video_data, fps=30.0)
580

581
# Write with custom codec and options
582
io.write_video(
583
    'output_hq.mp4',
584
    video_data, 
585
    fps=30.0,
586
    video_codec='libx264',
587
    options={'crf': '18', 'preset': 'slow'}  # High quality settings
588
)
589

590
# Write with different codec
591
io.write_video(
592
    'output_fast.mp4',
593
    video_data,
594
    fps=30.0,
595
    video_codec='mpeg4'
596
)
597
```
598

599
### Video Processing Pipeline
600

601
```python
602
import torchvision.io as io
603
import torchvision.transforms as transforms
604
import torch
605

606
def process_video_batch(input_path, output_path, transform=None):
607
    """
608
    Process video by applying transforms to batches of frames.
609
    """
610
    # Read video
611
    video_frames, audio_frames, info = io.read_video(input_path)
612
    
613
    # Convert from (T, H, W, C) to (T, C, H, W) for transforms
614
    video_frames = video_frames.permute(0, 3, 1, 2).float() / 255.0
615
    
616
    # Apply transforms if provided
617
    if transform:
618
        processed_frames = []
619
        for frame in video_frames:
620
            processed_frame = transform(frame)
621
            processed_frames.append(processed_frame)
622
        video_frames = torch.stack(processed_frames)
623
    
624
    # Convert back to (T, H, W, C) and uint8 for writing
625
    video_frames = video_frames.permute(0, 2, 3, 1)
626
    video_frames = (video_frames * 255).byte()
627
    
628
    # Write processed video
629
    io.write_video(output_path, video_frames, fps=info['video_fps'])
630

631
# Define processing pipeline
632
transform = transforms.Compose([
633
    transforms.Resize((224, 224)),
634
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
635
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
636
])
637

638
# Process video
639
process_video_batch('input.mp4', 'processed.mp4', transform)
640
```
641

642
### Memory-Efficient Video Processing
643

644
```python
645
import torchvision.io as io
646
import torch
647

648
def process_large_video(input_path, output_path, batch_size=32):
649
    """
650
    Process large video in batches to manage memory usage.
651
    """
652
    reader = io.VideoReader(input_path, 'video')
653
    metadata = reader.get_metadata()
654
    fps = metadata['video']['fps'][0]
655
    
656
    processed_frames = []
657
    batch = []
658
    
659
    for frame_data in reader:
660
        frame = frame_data['data'].float() / 255.0  # Normalize to [0, 1]
661
        batch.append(frame)
662
        
663
        # Process batch when full
664
        if len(batch) == batch_size:
665
            batch_tensor = torch.stack(batch)
666
            
667
            # Apply batch processing here (e.g., model inference)
668
            # For example, apply a simple transform
669
            processed_batch = torch.flip(batch_tensor, dims=[2])  # Horizontal flip
670
            
671
            processed_frames.extend(processed_batch)
672
            batch = []
673
    
674
    # Process remaining frames
675
    if batch:
676
        batch_tensor = torch.stack(batch)
677
        processed_batch = torch.flip(batch_tensor, dims=[2])
678
        processed_frames.extend(processed_batch)
679
    
680
    # Stack all processed frames and convert back to uint8
681
    all_frames = torch.stack(processed_frames)
682
    all_frames = (all_frames * 255).byte().permute(0, 2, 3, 1)  # (T, H, W, C)
683
    
684
    # Write output video
685
    io.write_video(output_path, all_frames, fps=fps)
686

687
# Process video in batches
688
process_large_video('large_input.mp4', 'large_output.mp4', batch_size=16)
689
```

Version

Tile

Files

io.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

io.mddocs/