Command-line program to download videos from YouTube.com and other video sites
Comprehensive utility functions for text processing, network operations, date parsing, file handling, and cross-platform compatibility. These utilities are used internally by youtube-dl and are available for external use.
Functions for cleaning and processing text content, filenames, and URLs.
def sanitize_filename(s, restricted=False, is_id=False):
"""
Sanitize filename for filesystem compatibility.
Parameters:
- s (str): Input filename string
- restricted (bool): Use ASCII-only characters
- is_id (bool): Whether string is a video ID
Returns:
str: Sanitized filename
"""
def sanitize_path(s):
"""
Sanitize complete file path.
Parameters:
- s (str): Input path string
Returns:
str: Sanitized path
"""
def sanitize_url(url):
"""
Sanitize URL for HTTP requests.
Parameters:
- url (str): Input URL
Returns:
str: Sanitized URL
"""
def clean_html(html):
"""
Clean HTML content by removing tags and entities.
Parameters:
- html (str): HTML content
Returns:
str: Plain text content
"""
def unescapeHTML(s):
"""
Unescape HTML entities in string.
Parameters:
- s (str): String with HTML entities
Returns:
str: Unescaped string
"""Functions for handling file operations, data encoding, and format conversion.
def format_bytes(bytes):
"""
Format byte count as human-readable string.
Parameters:
- bytes (int): Byte count
Returns:
str: Formatted string (e.g., '1.5 MB')
"""
def parse_filesize(s):
"""
Parse file size string into bytes.
Parameters:
- s (str): Size string (e.g., '1.5GB', '500MB')
Returns:
int: Size in bytes, or None if invalid
"""
def determine_ext(url, default_ext='unknown_video'):
"""
Determine file extension from URL.
Parameters:
- url (str): File URL
- default_ext (str): Default extension if undetermined
Returns:
str: File extension
"""
def encodeFilename(s, for_subprocess=False):
"""
Encode filename for filesystem operations.
Parameters:
- s (str): Filename string
- for_subprocess (bool): Encoding for subprocess calls
Returns:
bytes/str: Encoded filename
"""
def expand_path(s):
"""
Expand user home directory and environment variables in path.
Parameters:
- s (str): Path string
Returns:
str: Expanded path
"""Functions for parsing and formatting dates and time durations.
def unified_timestamp(date_str, day_first=True):
"""
Parse date string into Unix timestamp.
Parameters:
- date_str (str): Date string in various formats
- day_first (bool): Whether to interpret ambiguous dates as day-first
Returns:
int: Unix timestamp, or None if parsing fails
"""
def parse_iso8601(date_str, delimiter='T', colon=':'):
"""
Parse ISO 8601 date string.
Parameters:
- date_str (str): ISO 8601 date string
- delimiter (str): Date/time delimiter
- colon (str): Time component separator
Returns:
int: Unix timestamp
"""
def formatSeconds(secs, delim=':'):
"""
Format seconds as HH:MM:SS string.
Parameters:
- secs (int/float): Duration in seconds
- delim (str): Component delimiter
Returns:
str: Formatted duration string
"""
def parse_duration(s):
"""
Parse duration string into seconds.
Parameters:
- s (str): Duration string (e.g., '1:30', '90s', '1h30m')
Returns:
int: Duration in seconds
"""Functions for handling HTTP requests, cookies, and network operations.
def sanitized_Request(url, *args, **kwargs):
"""
Create sanitized HTTP request object.
Parameters:
- url (str): Request URL
- *args: Additional positional arguments
- **kwargs: Additional keyword arguments
Returns:
Request: HTTP request object
"""
def make_HTTPS_handler(params, **kwargs):
"""
Create HTTPS handler with custom SSL context.
Parameters:
- params (dict): SSL parameters
- **kwargs: Additional SSL options
Returns:
HTTPSHandler: HTTPS handler instance
"""
def std_headers():
"""
Get standard HTTP headers for requests.
Returns:
dict: Standard headers dictionary
"""
class YoutubeDLCookieJar:
"""
Custom cookie jar implementation for youtube-dl.
"""
def __init__(self, filename=None):
"""
Initialize cookie jar.
Parameters:
- filename (str): Cookie file path
"""Functions for working with data structures and collections.
def orderedSet(iterable):
"""
Create ordered set from iterable (preserves insertion order).
Parameters:
- iterable: Input iterable
Returns:
list: List with unique elements in order
"""
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
"""
Convert value to integer or return None.
Parameters:
- v: Input value
- scale (int): Scaling factor
- default: Default value if conversion fails
- get_attr (str): Attribute to extract from object
- invscale (int): Inverse scaling factor
Returns:
int/None: Converted integer or None
"""
def float_or_none(v, scale=1, invscale=1, default=None):
"""
Convert value to float or return None.
Parameters:
- v: Input value
- scale (float): Scaling factor
- invscale (float): Inverse scaling factor
- default: Default value if conversion fails
Returns:
float/None: Converted float or None
"""
def str_or_none(v, default=None):
"""
Convert value to string or return None.
Parameters:
- v: Input value
- default: Default value if conversion fails
Returns:
str/None: Converted string or None
"""Functions for encryption, decryption, and security operations.
def aes_encrypt(data, key, iv):
"""
AES encryption function.
Parameters:
- data (bytes): Data to encrypt
- key (bytes): Encryption key
- iv (bytes): Initialization vector
Returns:
bytes: Encrypted data
"""
def aes_decrypt(data, key, iv):
"""
AES decryption function.
Parameters:
- data (bytes): Encrypted data
- key (bytes): Decryption key
- iv (bytes): Initialization vector
Returns:
bytes: Decrypted data
"""
def pkcs1pad(data, length):
"""
Apply PKCS#1 padding to data.
Parameters:
- data (bytes): Input data
- length (int): Target length
Returns:
bytes: Padded data
"""Functions for handling cross-platform compatibility issues.
def preferredencoding():
"""
Get preferred text encoding for current platform.
Returns:
str: Encoding name (e.g., 'utf-8', 'cp1252')
"""
def write_string(s, out=None, encoding=None):
"""
Write string to output stream with proper encoding.
Parameters:
- s (str): String to write
- out: Output stream (default: sys.stdout)
- encoding (str): Text encoding
"""
def get_subprocess_encoding():
"""
Get appropriate encoding for subprocess operations.
Returns:
str: Encoding name
"""
def args_to_str(args):
"""
Convert argument list to command string.
Parameters:
- args (list): Argument list
Returns:
str: Command string
"""Exception classes for different error conditions.
class YoutubeDLError(Exception):
"""Base class for youtube-dl errors."""
def __init__(self, msg=None):
"""
Initialize error with message.
Parameters:
- msg (str): Error message
"""
class ExtractorError(YoutubeDLError):
"""Error during information extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
"""
Initialize extractor error.
Parameters:
- msg (str): Error message
- tb (str): Traceback information
- expected (bool): Whether error was expected
- cause (Exception): Underlying exception
- video_id (str): Video identifier
- ie (str): Info extractor name
"""
class DownloadError(YoutubeDLError):
"""Error during file download."""
class PostProcessingError(YoutubeDLError):
"""Error during post-processing."""
class UnavailableVideoError(ExtractorError):
"""Video is not available."""
class ContentTooShortError(YoutubeDLError):
"""Downloaded content is shorter than expected."""
class GeoRestrictedError(ExtractorError):
"""Content is geo-restricted."""
class MaxDownloadsReached(YoutubeDLError):
"""Maximum download limit reached."""from youtube_dl.utils import sanitize_filename
# Sanitize for cross-platform compatibility
safe_name = sanitize_filename("Video: Title with/special\\chars")
print(safe_name) # "Video꞉ Title with⧸special⧹chars"
# ASCII-only mode
ascii_name = sanitize_filename("Vidéo: Título", restricted=True)
print(ascii_name) # "Video_ Titulo"from youtube_dl.utils import format_bytes, parse_filesize
# Format bytes as human-readable
print(format_bytes(1536000)) # "1.46MB"
# Parse size strings
size = parse_filesize("1.5GB")
print(size) # 1610612736from youtube_dl.utils import unified_timestamp, formatSeconds
# Parse various date formats
timestamp = unified_timestamp("2021-12-17T15:30:00Z")
print(timestamp) # 1639751400
# Format duration
duration_str = formatSeconds(3665)
print(duration_str) # "1:01:05"from youtube_dl.utils import int_or_none, float_or_none
# Safe integer conversion
width = int_or_none("1920") # 1920
invalid = int_or_none("invalid") # None
# Safe float conversion with scaling
bitrate = float_or_none("128k", scale=1000) # 128000.0Install with Tessl CLI
npx tessl i tessl/pypi-youtube-dl