tessl/pypi-yt-dlp

A feature-rich command-line audio/video downloader forked from youtube-dl

Overview

Eval results

Files

Extractor System

Name: tessl/pypi-yt-dlp
Author: tessl

The extractor system provides discovery and management of site-specific extractors that handle URL pattern matching, metadata extraction, and format enumeration for over 1000 supported video platforms including YouTube, Vimeo, Twitch, TikTok, and many others.

Capabilities

Extractor Discovery Functions

Functions for discovering and listing available extractors in the system.

def gen_extractors():
    """
    Generate all available extractor instances.
    
    Yields:
    InfoExtractor: extractor instances
    """

def list_extractors(age_limit=None):
    """
    Get list of all available extractor instances, sorted by name.
    
    Parameters:
    - age_limit: int|None, filter by age limit
    
    Returns:
    list[InfoExtractor]: sorted list of extractor instances
    """

def gen_extractor_classes():
    """
    Generate all available extractor classes.
    
    Yields:
    type[InfoExtractor]: extractor classes
    """

def list_extractor_classes(age_limit=None):
    """
    Get list of all available extractor classes, sorted by name.
    
    Parameters:
    - age_limit: int|None, filter by age limit
    
    Returns:
    list[type[InfoExtractor]]: sorted list of extractor classes
    """

def get_info_extractor(ie_name):
    """
    Get specific extractor class by name.
    
    Parameters:
    - ie_name: str, extractor name/key
    
    Returns:
    type[InfoExtractor]: extractor class
    
    Raises:
    ValueError: if extractor not found
    """

Extractor Base Classes

Core extractor infrastructure providing the foundation for all site-specific extractors.

class InfoExtractor:
    """
    Base class for all information extractors.
    
    Provides common functionality for URL matching, information extraction,
    and format processing across all supported sites.
    """
    
    IE_NAME = None  # Extractor identifier
    IE_DESC = None  # Human-readable description
    _VALID_URL = None  # URL pattern regex
    _TESTS = []  # Test cases
    
    def suitable(self, url):
        """
        Check if URL is suitable for this extractor.
        
        Parameters:
        - url: str, URL to check
        
        Returns:
        bool: True if URL matches
        """
    
    def extract(self, url):
        """
        Extract information from URL.
        
        Parameters:
        - url: str, URL to extract from
        
        Returns:
        dict: extracted information
        """
    
    def _real_extract(self, url):
        """
        Perform actual extraction (implemented by subclasses).
        
        Parameters:
        - url: str, URL to extract from
        
        Returns:
        dict: extracted information
        """

class GenericIE(InfoExtractor):
    """
    Generic extractor that attempts to extract from any URL.
    
    Used as a fallback when no specific extractor matches the URL.
    Attempts to find video/audio content using generic patterns.
    """
    
    IE_NAME = 'generic'
    IE_DESC = 'Generic downloader that works on many sites'

Popular Site Extractors

Key extractors for major video platforms (representative examples from 1000+ available).

class YoutubeIE(InfoExtractor):
    """YouTube video extractor supporting various YouTube URL formats."""
    
    IE_NAME = 'youtube'
    
class VimeoIE(InfoExtractor):
    """Vimeo video extractor."""
    
    IE_NAME = 'vimeo'
    
class TwitchVodIE(InfoExtractor):
    """Twitch VOD (Video on Demand) extractor."""
    
    IE_NAME = 'twitch:vod'
    
class TikTokIE(InfoExtractor):
    """TikTok video extractor."""
    
    IE_NAME = 'tiktok'
    
class TwitterIE(InfoExtractor):
    """Twitter/X video extractor."""
    
    IE_NAME = 'twitter'
    
class InstagramIE(InfoExtractor):
    """Instagram video extractor."""
    
    IE_NAME = 'instagram'
    
class FacebookIE(InfoExtractor):
    """Facebook video extractor."""
    
    IE_NAME = 'facebook'

Usage Examples

List Available Extractors

from yt_dlp import list_extractors

# Get all extractors
extractors = list_extractors()
print(f"Total extractors: {len(extractors)}")

# Print first 10 extractor names
for ie in extractors[:10]:
    print(f"- {ie.IE_NAME}: {ie.IE_DESC}")

Check URL Compatibility

from yt_dlp import list_extractors

url = "https://www.youtube.com/watch?v=example"

# Find compatible extractors
compatible = []
for ie in list_extractors():
    if ie.suitable(url):
        compatible.append(ie.IE_NAME)

print(f"Compatible extractors for {url}: {compatible}")

Use Specific Extractor

import yt_dlp

# Force use of specific extractor
ydl_opts = {
    'forcejson': True,  # Output JSON info
    'skip_download': True,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    # Extract using specific extractor key
    info = ydl.extract_info(
        'https://www.youtube.com/watch?v=example',
        ie_key='youtube'
    )
    print(f"Extractor used: {info.get('extractor')}")

Get Extractor Information

from yt_dlp.extractor import get_info_extractor

# Get specific extractor class
youtube_ie = get_info_extractor('youtube')
print(f"Name: {youtube_ie.IE_NAME}")
print(f"Description: {youtube_ie.IE_DESC}")

# Check if URL is suitable
url = "https://www.youtube.com/watch?v=example"
is_suitable = youtube_ie.suitable(url)
print(f"Suitable for {url}: {is_suitable}")

Filter Extractors by Age Limit

from yt_dlp import list_extractors

# Get extractors that respect age limits
safe_extractors = list_extractors(age_limit=18)
all_extractors = list_extractors()

print(f"All extractors: {len(all_extractors)}")
print(f"Age-appropriate extractors: {len(safe_extractors)}")

Custom Extractor Registration

import yt_dlp
from yt_dlp.extractor import InfoExtractor

class CustomSiteIE(InfoExtractor):
    IE_NAME = 'customsite'
    IE_DESC = 'Custom site extractor'
    _VALID_URL = r'https?://customsite\.com/video/(?P<id>[0-9]+)'
    
    def _real_extract(self, url):
        video_id = self._match_id(url)
        # Custom extraction logic here
        return {
            'id': video_id,
            'title': f'Video {video_id}',
            'url': f'https://customsite.com/stream/{video_id}.mp4',
        }

# Register custom extractor
with yt_dlp.YoutubeDL() as ydl:
    ydl.add_info_extractor(CustomSiteIE())
    # Now can extract from custom site URLs

Supported Platforms

The extractor system supports over 1000 video platforms including:

Major Platforms

YouTube - Videos, playlists, channels, live streams
Vimeo - Videos, albums, channels, groups
Twitch - VODs, clips, live streams
TikTok - Videos, user profiles
Instagram - Videos, stories, IGTV
Twitter/X - Videos, spaces
Facebook - Videos, live streams

Educational

Coursera - Course videos and lectures
edX - Educational content
Khan Academy - Educational videos
MIT OCW - Course materials
Udemy - Course content

News and Media

BBC iPlayer - BBC content
CNN - News videos
NPR - Audio and video content
Reuters - News videos
Associated Press - News content

Entertainment

Netflix - Limited support for accessible content
Amazon Prime Video - Limited support
Hulu - Limited support
Crunchyroll - Anime content
Funimation - Anime content

Live Streaming

YouTube Live - Live streams and premieres
Twitch - Live gaming streams
Facebook Live - Live videos
Periscope - Live broadcasts
Dailymotion Live - Live content

Regional Platforms

Bilibili - Chinese video platform
Niconico - Japanese video platform
VK - Russian social network videos
Youku - Chinese video platform
Tudou - Chinese video platform

And hundreds more platforms across different regions and specialties.

Types

# Base extractor type
InfoExtractor = type

# Extractor result information dictionary
ExtractorResult = dict[str, Any]

# URL pattern matching result
URLMatch = re.Match[str] | None

Install with Tessl CLI