tessl/pypi-pytube

Python library for downloading YouTube videos with comprehensive stream management and metadata extraction capabilities.

—

Pending

Overview

Eval results

Files

Collection Operations

Name: tessl/pypi-pytube
Author: tessl

Playlist, channel, and search functionality for working with multiple YouTube videos, including bulk downloads and metadata extraction from video collections.

Capabilities

Playlist Class

Handle YouTube playlist operations for downloading and extracting metadata from multiple videos in a playlist.

class Playlist:
    def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
        """
        Initialize a Playlist object.
        
        Args:
            url (str): YouTube playlist URL
            proxies (dict, optional): HTTP proxy configuration
        """

Playlist Identification

Access playlist identification and URL properties.

@property
def playlist_id(self) -> str:
    """Get the unique playlist identifier."""

@property
def playlist_url(self) -> str:
    """Get the full playlist URL."""

Playlist Content Access

Access videos and URLs within the playlist.

@property
def video_urls(self) -> DeferredGeneratorList:
    """
    Get a list of video URLs in the playlist.
    
    Returns:
        DeferredGeneratorList: Lazy-loaded list of video URLs
    """

@property
def videos(self) -> Iterable[YouTube]:
    """
    Get YouTube objects for each video in the playlist.
    
    Returns:
        Iterable[YouTube]: Iterator of YouTube objects
    """

def videos_generator(self) -> Iterator[YouTube]:
    """
    Generator function that yields YouTube objects for playlist videos.
    
    Returns:
        Iterator[YouTube]: Generator yielding YouTube objects
    """

def url_generator(self) -> Iterator[str]:
    """
    Generator function that yields video URLs from the playlist.
    
    Returns:
        Iterator[str]: Generator yielding video URLs
    """

Playlist Metadata

Extract playlist information and statistics.

@property
def title(self) -> Optional[str]:
    """Get the playlist title."""

@property
def description(self) -> str:
    """Get the playlist description."""

@property
def length(self) -> int:
    """Get the number of videos in the playlist."""

@property
def views(self) -> int:
    """Get the total playlist view count."""

@property
def last_updated(self) -> Optional[date]:
    """Get the last update date of the playlist."""

Playlist Owner Information

Access information about the playlist creator.

@property
def owner(self) -> str:
    """Get the playlist owner's name."""

@property
def owner_id(self) -> str:
    """Get the playlist owner's channel ID."""

@property
def owner_url(self) -> str:
    """Get the playlist owner's channel URL."""

Playlist Navigation

Navigate and slice through playlist content.

def trimmed(self, video_id: str) -> Iterable[str]:
    """
    Get video URLs starting from a specific video ID.
    
    Args:
        video_id (str): Video ID to start from
        
    Returns:
        Iterable[str]: Video URLs from the specified point onward
    """

def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
    """
    Get video URL(s) by index or slice.
    
    Args:
        i (int or slice): Index or slice object
        
    Returns:
        str or List[str]: Video URL(s) at specified index/slice
    """

def __len__(self) -> int:
    """
    Get the number of videos in the playlist.
    
    Returns:
        int: Number of videos
    """

Playlist Technical Properties

Access technical information about the playlist.

@property
def html(self) -> str:
    """Get the raw HTML content of the playlist page."""

@property
def ytcfg(self) -> dict:
    """Get the YouTube configuration data."""

@property
def initial_data(self) -> dict:
    """Get the initial data from the playlist page."""

@property
def sidebar_info(self) -> dict:
    """Get sidebar information from the playlist page."""

@property
def yt_api_key(self) -> str:
    """Get the YouTube API key extracted from the page."""

Channel Class

Handle YouTube channel operations, providing access to all videos uploaded by a specific channel.

class Channel(Playlist):
    def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
        """
        Initialize a Channel object (inherits from Playlist).
        
        Args:
            url (str): YouTube channel URL
            proxies (dict, optional): HTTP proxy configuration
        """

Channel Identification

Access channel-specific identification properties.

@property
def channel_name(self) -> str:
    """Get the channel display name."""

@property
def channel_id(self) -> str:
    """Get the unique channel identifier."""

@property
def channel_uri(self) -> str:
    """Get the channel URI."""

@property
def vanity_url(self) -> Optional[str]:
    """Get the custom channel URL (if available). Returns None if it doesn't exist."""

Channel URLs

Access different channel page URLs.

@property
def channel_url(self) -> str:
    """Get the main channel URL."""

@property
def videos_url(self) -> str:
    """Get the channel videos page URL."""

@property
def playlists_url(self) -> str:
    """Get the channel playlists page URL."""

@property
def community_url(self) -> str:
    """Get the channel community page URL."""

@property
def featured_channels_url(self) -> str:
    """Get the featured channels page URL."""

@property
def about_url(self) -> str:
    """Get the channel about page URL."""

Channel Page Content

Access HTML content from different channel pages.

@property
def html(self) -> str:
    """Get the main channel page HTML content."""

@property
def playlists_html(self) -> str:
    """Get the HTML content of the playlists page."""

@property
def community_html(self) -> str:
    """Get the HTML content of the community page."""

@property
def featured_channels_html(self) -> str:
    """Get the HTML content of the featured channels page."""

@property
def about_html(self) -> str:
    """Get the HTML content of the about page."""

Search Class

Search YouTube videos with query-based video discovery and result pagination.

class Search:
    def __init__(self, query: str):
        """
        Initialize a Search object.
        
        Args:
            query (str): Search query string
        """

Search Properties

Access search query information and results.

@property
def query(self) -> str:
    """Get the current search query."""

@property
def results(self) -> list:
    """
    Get the list of search result YouTube objects.
    
    Returns:
        list: List of YouTube objects from search results
    """

@property
def completion_suggestions(self) -> list:
    """
    Get query auto-completion suggestions.
    
    Returns:
        list: List of suggested search terms
    """

Search Operations

Manage search results and pagination.

def get_next_results(self) -> None:
    """
    Load additional search results (pagination).
    
    Appends new results to the existing results list.
    """

def fetch_and_parse(self, continuation=None) -> Tuple[List[YouTube], str]:
    """
    Fetch and parse a page of search results.
    
    Args:
        continuation (str, optional): Continuation token for pagination
        
    Returns:
        Tuple[List[YouTube], str]: Tuple of (YouTube objects, next continuation token)
    """

def fetch_query(self, continuation=None) -> dict:
    """
    Fetch raw search query results from YouTube.
    
    Args:
        continuation (str, optional): Continuation token for pagination
        
    Returns:
        dict: Raw search result data
    """

Usage Examples

Basic Playlist Download

from pytube import Playlist

# Initialize playlist
playlist = Playlist('https://www.youtube.com/playlist?list=PLrqL3dXNe2Kej3sR4lZ7O2JFXOzd5Y0yV')

# Get playlist information
print(f"Playlist: {playlist.title}")
print(f"Owner: {playlist.owner}")
print(f"Videos: {playlist.length}")

# Download all videos
for video in playlist.videos:
    try:
        print(f"Downloading: {video.title}")
        stream = video.streams.get_highest_resolution()
        stream.download()
    except Exception as e:
        print(f"Failed to download {video.title}: {e}")

Selective Playlist Processing

from pytube import Playlist

playlist = Playlist('https://www.youtube.com/playlist?list=PLrqL3dXNe2Kej3sR4lZ7O2JFXOzd5Y0yV')

# Process only first 5 videos
for i, video in enumerate(playlist.videos):
    if i >= 5:
        break
    
    print(f"Video {i+1}: {video.title}")
    print(f"Duration: {video.length} seconds")
    print(f"Views: {video.views:,}")
    print("---")

# Or use slicing
first_five_urls = playlist.video_urls[:5]
print(f"First 5 URLs: {first_five_urls}")

Channel Video Discovery

from pytube import Channel

# Initialize channel
channel = Channel('https://www.youtube.com/c/PythonProgramming')

print(f"Channel: {channel.channel_name}")
print(f"Channel ID: {channel.channel_id}")
print(f"Total videos: {channel.length}")

# Get recent videos (first 10)
recent_videos = []
for i, video in enumerate(channel.videos):
    if i >= 10:
        break
    recent_videos.append({
        'title': video.title,
        'views': video.views,
        'duration': video.length,
        'publish_date': video.publish_date
    })

# Sort by publish date
recent_videos.sort(key=lambda x: x['publish_date'], reverse=True)

for video in recent_videos:
    print(f"{video['title']} - {video['views']:,} views")

Search and Filter Videos

from pytube import Search

# Search for videos
search = Search('python programming tutorial')

print(f"Search query: '{search.query}'")
print("Completion suggestions:", search.completion_suggestions[:5])

# Process first page of results
print(f"Found {len(search.results)} initial results")

for video in search.results[:5]:
    print(f"Title: {video.title}")
    print(f"Author: {video.author}")
    print(f"Duration: {video.length // 60}:{video.length % 60:02d}")
    print(f"Views: {video.views:,}")
    print("---")

# Load more results
search.get_next_results()
print(f"Total results after loading more: {len(search.results)}")

Bulk Download with Progress Tracking

from pytube import Playlist
import os
from datetime import datetime

def download_playlist_with_progress(playlist_url, download_dir):
    """Download entire playlist with progress tracking."""
    playlist = Playlist(playlist_url)
    
    # Create download directory
    safe_title = "".join(c for c in playlist.title if c.isalnum() or c in (' ', '-', '_')).rstrip()
    full_download_dir = os.path.join(download_dir, safe_title)
    os.makedirs(full_download_dir, exist_ok=True)
    
    print(f"Downloading playlist: {playlist.title}")
    print(f"Total videos: {playlist.length}")
    print(f"Download directory: {full_download_dir}")
    
    successful_downloads = 0
    failed_downloads = 0
    
    for i, video in enumerate(playlist.videos, 1):
        try:
            print(f"[{i}/{playlist.length}] {video.title}")
            
            # Get best available stream
            stream = video.streams.get_highest_resolution()
            if not stream:
                stream = video.streams.first()
            
            # Download with safe filename
            safe_filename = "".join(c for c in video.title if c.isalnum() or c in (' ', '-', '_')).rstrip()
            file_path = stream.download(
                output_path=full_download_dir,
                filename=safe_filename
            )
            
            successful_downloads += 1
            print(f"✓ Downloaded: {os.path.basename(file_path)}")
            
        except Exception as e:
            failed_downloads += 1
            print(f"✗ Failed: {e}")
    
    print(f"\nDownload completed!")
    print(f"Successful: {successful_downloads}")
    print(f"Failed: {failed_downloads}")

# Usage
download_playlist_with_progress(
    'https://www.youtube.com/playlist?list=PLrqL3dXNe2Kej3sR4lZ7O2JFXOzd5Y0yV',
    './downloads'
)

Advanced Collection Filtering

from pytube import Channel
from datetime import datetime, timedelta

def get_recent_channel_videos(channel_url, days=30, min_views=1000):
    """Get recent videos from a channel with view threshold."""
    channel = Channel(channel_url)
    cutoff_date = datetime.now() - timedelta(days=days)
    
    recent_videos = []
    
    for video in channel.videos:
        # Check if video meets criteria
        if (video.publish_date and 
            video.publish_date > cutoff_date and 
            video.views >= min_views):
            
            recent_videos.append({
                'title': video.title,
                'url': video.watch_url,
                'views': video.views,
                'duration': video.length,
                'publish_date': video.publish_date
            })
    
    # Sort by views (most popular first)
    recent_videos.sort(key=lambda x: x['views'], reverse=True)
    
    return recent_videos

# Usage
popular_recent = get_recent_channel_videos(
    'https://www.youtube.com/c/PythonProgramming',
    days=60,
    min_views=5000
)

print(f"Found {len(popular_recent)} popular recent videos:")
for video in popular_recent[:10]:
    print(f"{video['title']} - {video['views']:,} views")

Search with Custom Filters

from pytube import Search

def search_videos_with_duration_filter(query, min_duration=300, max_duration=1800):
    """Search for videos within a specific duration range."""
    search = Search(query)
    
    filtered_videos = []
    processed = 0
    max_process = 50  # Limit to avoid excessive API calls
    
    while len(filtered_videos) < 10 and processed < max_process:
        for video in search.results[processed:]:
            processed += 1
            
            # Check duration constraints
            if min_duration <= video.length <= max_duration:
                filtered_videos.append({
                    'title': video.title,
                    'author': video.author,
                    'duration': video.length,
                    'views': video.views,
                    'url': video.watch_url
                })
                
                if len(filtered_videos) >= 10:
                    break
        
        # Load more results if needed
        if len(filtered_videos) < 10 and processed >= len(search.results):
            search.get_next_results()
    
    return filtered_videos

# Usage: Find 5-30 minute Python tutorials
medium_tutorials = search_videos_with_duration_filter(
    'python tutorial beginner',
    min_duration=300,   # 5 minutes
    max_duration=1800   # 30 minutes
)

for video in medium_tutorials:
    duration_str = f"{video['duration'] // 60}:{video['duration'] % 60:02d}"
    print(f"{video['title']} ({duration_str}) - {video['views']:,} views")

Types

from typing import Dict, List, Optional, Union, Iterable, Iterator, Tuple, Any
from datetime import date

# Deferred generator list for lazy loading
class DeferredGeneratorList:
    """
    Lazy-loaded list that generates items on demand.
    
    A wrapper class for deferring list generation to improve performance
    when working with large collections like playlists and channels.
    Items are only fetched and processed when accessed, preventing
    unnecessary web requests and memory usage.
    """
    
    def __init__(self, generator):
        """
        Initialize DeferredGeneratorList with a generator.
        
        Args:
            generator: The deferrable generator to create a wrapper for
        """
    
    def __getitem__(self, key: Union[int, slice]) -> Any:
        """
        Get item(s) by index or slice, generating only as needed.
        
        Args:
            key (int or slice): Index or slice object
            
        Returns:
            Any: Item(s) at specified index/slice
            
        Raises:
            TypeError: If key is not int or slice
            IndexError: If index is out of range
        """
    
    def __iter__(self) -> Iterator[Any]:
        """
        Iterate through items, generating dynamically.
        
        Returns:
            Iterator[Any]: Iterator over generated items
        """
    
    def __len__(self) -> int:
        """
        Get total length by generating all items.
        
        Returns:
            int: Total number of items in the collection
        """
    
    def __repr__(self) -> str:
        """
        String representation of all items.
        
        Returns:
            str: String representation of the complete list
        """
    
    def __reversed__(self) -> List[Any]:
        """
        Get reversed list of all items.
        
        Returns:
            List[Any]: Reversed list of all items
        """
    
    def generate_all(self) -> None:
        """
        Force generation of all items in the collection.
        
        Useful when you need to access the complete list immediately
        rather than generating items on-demand.
        """

# Collection filtering types
CollectionFilter = Callable[[YouTube], bool]

Install with Tessl CLI