tessl/pypi-pytube

Python library for downloading YouTube videos with comprehensive stream management and metadata extraction capabilities.

—

Pending

Overview

Eval results

Files

Video Downloads and Metadata

Name: tessl/pypi-pytube
Author: tessl

Core functionality for downloading individual YouTube videos and extracting comprehensive metadata including titles, descriptions, view counts, thumbnails, and publication information.

Capabilities

YouTube Class

Primary interface for single video operations providing access to video metadata, stream collections, and download capabilities.

class YouTube:
    def __init__(
        self,
        url: str,
        on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
        on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
        proxies: Dict[str, str] = None,
        use_oauth: bool = False,
        allow_oauth_cache: bool = True
    ):
        """
        Construct a YouTube object for a single video.

        Args:
            url (str): A valid YouTube watch URL
            on_progress_callback (callable, optional): User defined callback function for stream download progress events
            on_complete_callback (callable, optional): User defined callback function for stream download complete events  
            proxies (dict, optional): A dict mapping protocol to proxy address which will be used by pytube
            use_oauth (bool, optional): Prompt the user to authenticate to YouTube
            allow_oauth_cache (bool, optional): Cache OAuth tokens locally on the machine
        """

Video Identification

Access video identification and URL properties.

@property
def video_id(self) -> str:
    """Get the video ID extracted from the URL."""

@property  
def watch_url(self) -> str:
    """Get the full YouTube watch URL."""

@property
def embed_url(self) -> str:
    """Get the YouTube embed URL."""

@property
def thumbnail_url(self) -> str:
    """Get the video thumbnail URL."""

Video Metadata

Extract comprehensive video metadata including title, description, author, and publication details.

@property
def title(self) -> str:
    """Get the video title."""

@title.setter  
def title(self, value: str):
    """Set the video title."""

@property
def description(self) -> str:
    """Get the video description."""

@property
def author(self) -> str:
    """Get the video author/channel name."""

@author.setter
def author(self, value: str):
    """Set the video author."""

@property
def publish_date(self) -> datetime:
    """Get the video publish date."""

@publish_date.setter
def publish_date(self, value: datetime):
    """Set the video publish date."""

@property
def keywords(self) -> List[str]:
    """Get the video keywords/tags."""

Video Statistics

Access view counts, ratings, and duration information.

@property
def views(self) -> int:
    """Get the number of times the video has been viewed."""

@property
def rating(self) -> float:
    """Get the video average rating."""

@property
def length(self) -> int:
    """Get the video length in seconds."""

Channel Information

Extract information about the video's channel.

@property
def channel_id(self) -> str:
    """Get the video poster's channel ID."""

@property
def channel_url(self) -> str:
    """Get the channel URL for the video's poster."""

Stream and Caption Access

Access downloadable streams and caption tracks.

@property
def streams(self) -> StreamQuery:
    """Interface to query both adaptive (DASH) and progressive streams."""

@property
def captions(self) -> CaptionQuery:
    """Interface to query caption tracks."""

@property
def caption_tracks(self) -> List[Caption]:
    """Get a list of Caption objects."""

Availability Checking

Verify video availability and handle various restriction scenarios.

def check_availability(self) -> None:
    """
    Check whether the video is available.
    
    Raises different exceptions based on why the video is unavailable,
    otherwise does nothing.
    """

@property
def age_restricted(self) -> bool:
    """Check if the video is age restricted."""

def bypass_age_gate(self) -> None:
    """Attempt to update the vid_info by bypassing the age gate."""

Callback Management

Manage download progress and completion callbacks.

def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]) -> None:
    """
    Register a download progress callback function post initialization.
    
    Args:
        func (callable): A callback function that takes stream, chunk, and bytes_remaining as parameters
    """

def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]) -> None:
    """
    Register a download complete callback function post initialization.
    
    Args:
        func (callable): A callback function that takes stream and file_path
    """

Static Methods

Create YouTube objects from video IDs.

@staticmethod
def from_id(video_id: str) -> YouTube:
    """
    Construct a YouTube object from a video ID.
    
    Args:
        video_id (str): The video ID of the YouTube video
        
    Returns:
        YouTube: YouTube object for the specified video
    """

Extended Metadata

Access additional video metadata and technical information.

@property
def metadata(self) -> Optional[YouTubeMetadata]:
    """Get the metadata for the video."""

@property
def watch_html(self) -> str:
    """Get the HTML content of the watch page."""

@property
def vid_info(self) -> Dict:
    """Parse the raw vid info and return the parsed result."""

@property
def initial_data(self) -> Dict:
    """Get the initial data from the watch page."""

@property
def embed_html(self) -> str:
    """Get the HTML content of the embed page."""

@property
def js_url(self) -> str:
    """Get the URL to the JavaScript file containing signature decryption functions."""

@property
def js(self) -> str:
    """Get the JavaScript content for signature decryption."""

@property
def streaming_data(self) -> Dict:
    """Get the streamingData from video info, bypassing age gate if necessary."""

@property
def fmt_streams(self) -> List[Stream]:
    """Get the list of initialized Stream objects if they have been processed."""

Usage Examples

Basic Video Download

from pytube import YouTube

# Create YouTube object
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')

# Get basic information
print(f"Title: {yt.title}")
print(f"Author: {yt.author}")
print(f"Duration: {yt.length} seconds")
print(f"Views: {yt.views:,}")

# Download the highest quality progressive stream
stream = yt.streams.get_highest_resolution()
stream.download()

Progress Tracking

from pytube import YouTube

def progress_callback(stream, chunk, bytes_remaining):
    total_size = stream.filesize
    bytes_downloaded = total_size - bytes_remaining
    percentage = bytes_downloaded / total_size * 100
    print(f"Download progress: {percentage:.1f}%")

def complete_callback(stream, file_path):
    print(f"Download completed! File saved to: {file_path}")

# Create YouTube object with callbacks
yt = YouTube(
    'https://www.youtube.com/watch?v=9bZkp7q19f0',
    on_progress_callback=progress_callback,
    on_complete_callback=complete_callback
)

# Download with progress tracking
stream = yt.streams.get_highest_resolution()
stream.download()

OAuth Authentication

from pytube import YouTube

# Use OAuth for accessing private or restricted content
yt = YouTube(
    'https://www.youtube.com/watch?v=PRIVATE_VIDEO_ID',
    use_oauth=True,
    allow_oauth_cache=True
)

# Download after authentication
stream = yt.streams.get_highest_resolution()
stream.download()

Proxy Configuration

from pytube import YouTube

# Configure proxy settings
proxies = {
    'http': 'http://proxy.example.com:8080',
    'https': 'https://proxy.example.com:8080'
}

yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0', proxies=proxies)
stream = yt.streams.get_highest_resolution()
stream.download()

Types

class YouTubeMetadata:
    """
    Extended metadata information for YouTube videos.
    
    Parses and organizes structured metadata from YouTube video pages,
    including video categories, tags, descriptions, and other detailed information.
    """
    
    def __init__(self, metadata: List):
        """
        Initialize YouTubeMetadata with raw metadata list.
        
        Args:
            metadata (List): Raw metadata list from YouTube page
        """
    
    def __getitem__(self, key: int) -> Dict:
        """
        Get metadata section by index.
        
        Args:
            key (int): Index of metadata section
            
        Returns:
            Dict: Metadata section dictionary
        """
    
    def __iter__(self) -> Iterator[Dict]:
        """
        Iterate through metadata sections.
        
        Returns:
            Iterator[Dict]: Iterator over metadata section dictionaries
        """
    
    def __str__(self) -> str:
        """
        Get JSON string representation of metadata.
        
        Returns:
            str: JSON formatted metadata string
        """
    
    @property
    def raw_metadata(self) -> Optional[List]:
        """Get the raw unprocessed metadata list."""
    
    @property
    def metadata(self) -> List[Dict]:
        """Get the processed metadata as a list of dictionaries."""

# Callback types
ProgressCallback = Callable[[Any, bytes, int], None]
CompleteCallback = Callable[[Any, Optional[str]], None]

Install with Tessl CLI