CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pytube

Python library for downloading YouTube videos with comprehensive stream management and metadata extraction capabilities.

Pending
Overview
Eval results
Files

stream-management.mddocs/

Stream Management and Filtering

Advanced stream selection and filtering capabilities for choosing specific video qualities, formats, and codecs from available YouTube video and audio streams.

Capabilities

Stream Class

Represents an individual downloadable video or audio stream with specific quality, format, and codec characteristics.

class Stream:
    def __init__(self, stream: Dict, monostate: Monostate):
        """
        Initialize a Stream object.
        
        Args:
            stream (dict): Stream metadata dictionary
            monostate (Monostate): Shared state object for callbacks
        """

Basic Stream Properties

Access fundamental stream characteristics including URL, format, and technical specifications.

@property
def url(self) -> str:
    """Get the direct download URL for the stream."""

@property
def itag(self) -> int:
    """Get the YouTube internal stream identifier."""

@property
def mime_type(self) -> str:
    """Get the MIME type (e.g., 'video/mp4', 'audio/webm')."""

@property
def type(self) -> str:
    """Get the media type (e.g., 'video', 'audio')."""

@property
def subtype(self) -> str:  
    """Get the media subtype (e.g., 'mp4', 'webm')."""

@property
def codecs(self) -> List[str]:
    """Get the list of codecs used in the stream."""

@property
def video_codec(self) -> Optional[str]:
    """Get the video codec (e.g., 'avc1.640028')."""

@property
def audio_codec(self) -> Optional[str]:
    """Get the audio codec (e.g., 'mp4a.40.2')."""

File Size Information

Access stream file size in various units.

@property
def filesize(self) -> int:
    """Get the file size in bytes."""

@property
def filesize_kb(self) -> float:
    """Get the file size in kilobytes."""

@property  
def filesize_mb(self) -> float:
    """Get the file size in megabytes."""

@property
def filesize_gb(self) -> float:
    """Get the file size in gigabytes."""

@property
def filesize_approx(self) -> int:
    """Get the approximate file size in bytes."""

Stream Type Classification

Determine stream characteristics and content types.

@property
def is_progressive(self) -> bool:
    """Check if stream contains both audio and video."""

@property
def is_adaptive(self) -> bool:
    """Check if stream is DASH (audio or video only)."""

@property
def includes_audio_track(self) -> bool:
    """Check if stream contains audio."""

@property
def includes_video_track(self) -> bool:
    """Check if stream contains video."""

@property
def is_dash(self) -> bool:
    """Check if stream uses DASH protocol."""

@property
def is_otf(self) -> bool:
    """Check if stream is On-The-Fly generated."""

Video Stream Properties

Properties specific to video streams.

@property
def resolution(self) -> str:
    """Get the video resolution (e.g., '720p', '1080p')."""

@property
def fps(self) -> int:
    """Get the frames per second for video streams."""

@property
def is_3d(self) -> bool:
    """Check if the video stream is 3D."""

@property
def is_hdr(self) -> bool:
    """Check if the video stream supports HDR."""

@property
def is_live(self) -> bool:
    """Check if the stream is from a live broadcast."""

Audio Stream Properties

Properties specific to audio streams.

@property
def abr(self) -> str:
    """Get the audio bitrate (e.g., '128kbps')."""

@property
def bitrate(self) -> Optional[int]:
    """Get the bitrate in bits per second."""

File Operations

Download and file management operations.

def download(
    self,
    output_path: Optional[str] = None,
    filename: Optional[str] = None,
    filename_prefix: Optional[str] = None,
    skip_existing: bool = True,
    timeout: Optional[int] = None,
    max_retries: Optional[int] = 0
) -> str:
    """
    Download the stream to a file.
    
    Args:
        output_path (str, optional): Directory to save the file
        filename (str, optional): Custom filename (without extension)
        filename_prefix (str, optional): Prefix to add to filename
        skip_existing (bool): Skip download if file already exists
        timeout (int, optional): Request timeout in seconds
        max_retries (int, optional): Maximum number of retry attempts
        
    Returns:
        str: Path to the downloaded file
    """

def stream_to_buffer(self, buffer: BinaryIO) -> None:
    """
    Stream content directly to a buffer.
    
    Args:
        buffer (BinaryIO): Buffer to write stream content to
    """

def get_file_path(
    self,
    filename: Optional[str] = None,
    output_path: Optional[str] = None,
    filename_prefix: Optional[str] = None
) -> str:
    """
    Get the full file path for the download.
    
    Args:
        filename (str, optional): Custom filename
        output_path (str, optional): Directory path
        filename_prefix (str, optional): Filename prefix
        
    Returns:
        str: Complete file path
    """

@property
def default_filename(self) -> str:
    """Get the auto-generated filename for the stream."""

def exists_at_path(self, file_path: str) -> bool:
    """
    Check if file exists at the specified path.
    
    Args:
        file_path (str): Path to check
        
    Returns:
        bool: True if file exists
    """

Stream Metadata

Additional stream information and metadata.

@property
def title(self) -> str:
    """Get the video title associated with this stream."""

@property
def expiration(self) -> datetime:
    """Get the stream URL expiration datetime."""

StreamQuery Class

Query and filtering interface for stream collections with chainable methods for advanced stream selection.

class StreamQuery:
    def __init__(self, fmt_streams: List[Stream]):
        """
        Initialize StreamQuery with a list of streams.
        
        Args:
            fmt_streams (List[Stream]): List of available streams
        """

Stream Filtering

Filter streams by various criteria with chainable methods.

def filter(
    self,
    fps=None,
    res=None,
    resolution=None,
    mime_type=None,
    type=None,
    subtype=None,
    file_extension=None,
    abr=None,
    bitrate=None,
    video_codec=None,
    audio_codec=None,
    only_audio=None,
    only_video=None,
    progressive=None,
    adaptive=None,
    is_dash=None,
    custom_filter_functions=None
) -> StreamQuery:
    """
    Filter streams by multiple criteria.
    
    Args:
        fps (int, optional): Frames per second
        res (str, optional): Resolution (e.g., '720p')
        resolution (str, optional): Alias for res
        mime_type (str, optional): MIME type filter
        type (str, optional): Media type ('video', 'audio')
        subtype (str, optional): Media subtype ('mp4', 'webm')
        file_extension (str, optional): File extension filter
        abr (str, optional): Audio bitrate
        bitrate (int, optional): Bitrate filter
        video_codec (str, optional): Video codec filter
        audio_codec (str, optional): Audio codec filter
        only_audio (bool, optional): Audio-only streams
        only_video (bool, optional): Video-only streams
        progressive (bool, optional): Progressive streams
        adaptive (bool, optional): Adaptive (DASH) streams
        is_dash (bool, optional): DASH protocol streams
        custom_filter_functions (List[callable], optional): Custom filter functions
        
    Returns:
        StreamQuery: Filtered stream query object
    """

def otf(self, is_otf: bool = False) -> StreamQuery:
    """
    Filter streams by On-The-Fly generation status.
    
    Args:
        is_otf (bool): Filter for OTF streams
        
    Returns:
        StreamQuery: Filtered stream query
    """

Stream Ordering

Sort and order streams by attributes.

def order_by(self, attribute_name: str) -> StreamQuery:
    """
    Order streams by a specific attribute.
    
    Args:
        attribute_name (str): Stream attribute to order by
        
    Returns:
        StreamQuery: Ordered stream query
    """

def desc(self) -> StreamQuery:
    """
    Order streams in descending order.
    
    Returns:
        StreamQuery: Stream query with descending order
    """

def asc(self) -> StreamQuery:
    """
    Order streams in ascending order.
    
    Returns:
        StreamQuery: Stream query with ascending order
    """

Stream Selection

Get specific streams from the filtered results.

def get_by_itag(self, itag: int) -> Optional[Stream]:
    """
    Get stream by YouTube itag identifier.
    
    Args:
        itag (int): YouTube itag identifier
        
    Returns:
        Stream or None: Stream with matching itag
    """

def get_by_resolution(self, resolution: str) -> Optional[Stream]:
    """
    Get stream by resolution.
    
    Args:
        resolution (str): Target resolution (e.g., '720p')
        
    Returns:
        Stream or None: Stream with matching resolution
    """

def get_highest_resolution(self) -> Optional[Stream]:
    """
    Get the stream with the highest resolution.
    
    Returns:
        Stream or None: Highest resolution stream
    """

def get_lowest_resolution(self) -> Optional[Stream]:
    """
    Get the stream with the lowest resolution.
    
    Returns:
        Stream or None: Lowest resolution stream
    """

def get_audio_only(self, subtype: str = "mp4") -> Optional[Stream]:
    """
    Get an audio-only stream.
    
    Args:
        subtype (str): Preferred audio format
        
    Returns:
        Stream or None: Audio-only stream
    """

def first(self) -> Optional[Stream]:
    """
    Get the first stream in the filtered results.
    
    Returns:
        Stream or None: First stream
    """

def last(self) -> Optional[Stream]:
    """
    Get the last stream in the filtered results.
    
    Returns:
        Stream or None: Last stream
    """

Sequence Operations

Access streams using sequence operations.

def __getitem__(self, i: Union[slice, int]) -> Union[Stream, List[Stream]]:
    """
    Get stream(s) by index or slice.
    
    Args:
        i (int or slice): Index or slice object
        
    Returns:
        Stream or List[Stream]: Stream(s) at specified index/slice
    """

def __len__(self) -> int:
    """
    Get the number of streams in the query.
    
    Returns:
        int: Number of streams
    """

### Deprecated Methods

Legacy methods maintained for backward compatibility.

```python { .api }
def count(self, value: Optional[str] = None) -> int:
    """
    Get the count of items in the list.
    
    **DEPRECATED**: Use len() directly on the StreamQuery object instead.
    
    Args:
        value (str, optional): Specific value to count
        
    Returns:
        int: Count of streams or specific value occurrences
    """

def all(self) -> List[Stream]:
    """
    Get all the results represented by this query as a list.
    
    **DEPRECATED**: StreamQuery can be treated as a list directly.
    
    Returns:
        List[Stream]: All streams in the query
    """

Usage Examples

Basic Stream Selection

from pytube import YouTube

yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')

# Get all available streams
streams = yt.streams

# Get highest resolution progressive stream
best_stream = streams.get_highest_resolution()
print(f"Best quality: {best_stream.resolution} - {best_stream.filesize_mb:.1f}MB")

# Get audio-only stream
audio_stream = streams.get_audio_only()
print(f"Audio: {audio_stream.abr} - {audio_stream.filesize_mb:.1f}MB")

Advanced Filtering

from pytube import YouTube

yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')

# Filter by multiple criteria
hd_mp4_streams = yt.streams.filter(
    resolution='720p',
    file_extension='mp4',
    progressive=True
)

# Custom filtering with lambda functions
large_files = yt.streams.filter(
    custom_filter_functions=[lambda s: s.filesize > 100_000_000]  # > 100MB
)

# Chain filters and ordering
best_audio = (yt.streams
              .filter(only_audio=True, file_extension='mp4')
              .order_by('abr')
              .desc()
              .first())

Stream Information Analysis

from pytube import YouTube

yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')

# Analyze all available streams
for stream in yt.streams:
    print(f"itag: {stream.itag}")
    print(f"Type: {stream.type}")
    print(f"Quality: {stream.resolution or stream.abr}")
    print(f"Format: {stream.mime_type}")
    print(f"Size: {stream.filesize_mb:.1f}MB")
    print(f"Progressive: {stream.is_progressive}")
    print("---")

Custom Download Locations

from pytube import YouTube
import os

yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')

# Download to specific directory with custom filename
output_dir = "/path/to/downloads"
custom_filename = "my_video"

stream = yt.streams.get_highest_resolution()
file_path = stream.download(
    output_path=output_dir,
    filename=custom_filename
)

print(f"Downloaded to: {file_path}")

Streaming to Buffer

from pytube import YouTube
from io import BytesIO

yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
stream = yt.streams.get_audio_only()

# Stream directly to memory buffer
buffer = BytesIO()
stream.stream_to_buffer(buffer)

# Process buffer content
buffer.seek(0)
audio_data = buffer.read()
print(f"Audio data size: {len(audio_data)} bytes")

Types

from typing import List, Optional, Union, BinaryIO, Callable, Dict, Any
from datetime import datetime

# Custom filter function type
CustomFilterFunction = Callable[[Stream], bool]

# Stream filtering parameter types
FilterValue = Union[str, int, bool, None]

Install with Tessl CLI

npx tessl i tessl/pypi-pytube

docs

captions.md

collections.md

exceptions.md

index.md

stream-management.md

video-downloads.md

tile.json