A feature-rich command-line audio/video downloader forked from youtube-dl
The extractor system provides discovery and management of site-specific extractors that handle URL pattern matching, metadata extraction, and format enumeration for over 1000 supported video platforms including YouTube, Vimeo, Twitch, TikTok, and many others.
Functions for discovering and listing available extractors in the system.
def gen_extractors():
"""
Generate all available extractor instances.
Yields:
InfoExtractor: extractor instances
"""
def list_extractors(age_limit=None):
"""
Get list of all available extractor instances, sorted by name.
Parameters:
- age_limit: int|None, filter by age limit
Returns:
list[InfoExtractor]: sorted list of extractor instances
"""
def gen_extractor_classes():
"""
Generate all available extractor classes.
Yields:
type[InfoExtractor]: extractor classes
"""
def list_extractor_classes(age_limit=None):
"""
Get list of all available extractor classes, sorted by name.
Parameters:
- age_limit: int|None, filter by age limit
Returns:
list[type[InfoExtractor]]: sorted list of extractor classes
"""
def get_info_extractor(ie_name):
"""
Get specific extractor class by name.
Parameters:
- ie_name: str, extractor name/key
Returns:
type[InfoExtractor]: extractor class
Raises:
ValueError: if extractor not found
"""Core extractor infrastructure providing the foundation for all site-specific extractors.
class InfoExtractor:
"""
Base class for all information extractors.
Provides common functionality for URL matching, information extraction,
and format processing across all supported sites.
"""
IE_NAME = None # Extractor identifier
IE_DESC = None # Human-readable description
_VALID_URL = None # URL pattern regex
_TESTS = [] # Test cases
def suitable(self, url):
"""
Check if URL is suitable for this extractor.
Parameters:
- url: str, URL to check
Returns:
bool: True if URL matches
"""
def extract(self, url):
"""
Extract information from URL.
Parameters:
- url: str, URL to extract from
Returns:
dict: extracted information
"""
def _real_extract(self, url):
"""
Perform actual extraction (implemented by subclasses).
Parameters:
- url: str, URL to extract from
Returns:
dict: extracted information
"""
class GenericIE(InfoExtractor):
"""
Generic extractor that attempts to extract from any URL.
Used as a fallback when no specific extractor matches the URL.
Attempts to find video/audio content using generic patterns.
"""
IE_NAME = 'generic'
IE_DESC = 'Generic downloader that works on many sites'Key extractors for major video platforms (representative examples from 1000+ available).
class YoutubeIE(InfoExtractor):
"""YouTube video extractor supporting various YouTube URL formats."""
IE_NAME = 'youtube'
class VimeoIE(InfoExtractor):
"""Vimeo video extractor."""
IE_NAME = 'vimeo'
class TwitchVodIE(InfoExtractor):
"""Twitch VOD (Video on Demand) extractor."""
IE_NAME = 'twitch:vod'
class TikTokIE(InfoExtractor):
"""TikTok video extractor."""
IE_NAME = 'tiktok'
class TwitterIE(InfoExtractor):
"""Twitter/X video extractor."""
IE_NAME = 'twitter'
class InstagramIE(InfoExtractor):
"""Instagram video extractor."""
IE_NAME = 'instagram'
class FacebookIE(InfoExtractor):
"""Facebook video extractor."""
IE_NAME = 'facebook'from yt_dlp import list_extractors
# Get all extractors
extractors = list_extractors()
print(f"Total extractors: {len(extractors)}")
# Print first 10 extractor names
for ie in extractors[:10]:
print(f"- {ie.IE_NAME}: {ie.IE_DESC}")from yt_dlp import list_extractors
url = "https://www.youtube.com/watch?v=example"
# Find compatible extractors
compatible = []
for ie in list_extractors():
if ie.suitable(url):
compatible.append(ie.IE_NAME)
print(f"Compatible extractors for {url}: {compatible}")import yt_dlp
# Force use of specific extractor
ydl_opts = {
'forcejson': True, # Output JSON info
'skip_download': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# Extract using specific extractor key
info = ydl.extract_info(
'https://www.youtube.com/watch?v=example',
ie_key='youtube'
)
print(f"Extractor used: {info.get('extractor')}")from yt_dlp.extractor import get_info_extractor
# Get specific extractor class
youtube_ie = get_info_extractor('youtube')
print(f"Name: {youtube_ie.IE_NAME}")
print(f"Description: {youtube_ie.IE_DESC}")
# Check if URL is suitable
url = "https://www.youtube.com/watch?v=example"
is_suitable = youtube_ie.suitable(url)
print(f"Suitable for {url}: {is_suitable}")from yt_dlp import list_extractors
# Get extractors that respect age limits
safe_extractors = list_extractors(age_limit=18)
all_extractors = list_extractors()
print(f"All extractors: {len(all_extractors)}")
print(f"Age-appropriate extractors: {len(safe_extractors)}")import yt_dlp
from yt_dlp.extractor import InfoExtractor
class CustomSiteIE(InfoExtractor):
IE_NAME = 'customsite'
IE_DESC = 'Custom site extractor'
_VALID_URL = r'https?://customsite\.com/video/(?P<id>[0-9]+)'
def _real_extract(self, url):
video_id = self._match_id(url)
# Custom extraction logic here
return {
'id': video_id,
'title': f'Video {video_id}',
'url': f'https://customsite.com/stream/{video_id}.mp4',
}
# Register custom extractor
with yt_dlp.YoutubeDL() as ydl:
ydl.add_info_extractor(CustomSiteIE())
# Now can extract from custom site URLsThe extractor system supports over 1000 video platforms including:
And hundreds more platforms across different regions and specialties.
# Base extractor type
InfoExtractor = type
# Extractor result information dictionary
ExtractorResult = dict[str, Any]
# URL pattern matching result
URLMatch = re.Match[str] | NoneInstall with Tessl CLI
npx tessl i tessl/pypi-yt-dlp