Python API for retrieving YouTube video transcripts and subtitles without browser automation
Classes for configuring HTTP proxies to work around IP blocking by YouTube. Includes generic proxy support and specialized integration with Webshare residential proxies.
Abstract base class defining the proxy configuration interface. All concrete proxy configs inherit from this class.
class ProxyConfig:
def to_requests_dict(self):
"""
Convert to requests library proxy format.
Returns:
RequestsProxyConfigDict: Dictionary with 'http' and 'https' keys
Raises:
NotImplementedError: Must be implemented by subclasses
"""
@property
def prevent_keeping_connections_alive(self):
"""
Whether to prevent keeping TCP connections alive.
Returns:
bool: True if connections should be closed after each request
"""
@property
def retries_when_blocked(self):
"""
Number of retries when requests are blocked.
Returns:
int: Number of retry attempts
"""Configuration for any HTTP/HTTPS/SOCKS proxy. Supports both HTTP and HTTPS proxies with automatic fallback.
class GenericProxyConfig(ProxyConfig):
def __init__(self, http_url=None, https_url=None):
"""
Configure generic HTTP/HTTPS proxy.
Args:
http_url (str, optional): Proxy URL for HTTP requests
https_url (str, optional): Proxy URL for HTTPS requests
Raises:
InvalidProxyConfig: Neither http_url nor https_url provided
Note:
If only one URL is provided, it will be used for both HTTP and HTTPS.
Supports HTTP, HTTPS, and SOCKS proxies as per requests library format.
"""
def to_requests_dict(self):
"""
Convert to requests proxy dictionary.
Returns:
RequestsProxyConfigDict: Proxy configuration for requests library
"""
@property
def http_url(self):
"""str: HTTP proxy URL"""
@property
def https_url(self):
"""str: HTTPS proxy URL"""Specialized configuration for Webshare residential proxies with rotating IP addresses. Optimized for working around YouTube's IP blocking.
class WebshareProxyConfig(GenericProxyConfig):
DEFAULT_DOMAIN_NAME = "p.webshare.io"
DEFAULT_PORT = 80
def __init__(self, proxy_username, proxy_password, filter_ip_locations=None,
retries_when_blocked=10, domain_name=DEFAULT_DOMAIN_NAME,
proxy_port=DEFAULT_PORT):
"""
Configure Webshare residential proxy with IP rotation.
Args:
proxy_username (str): Webshare proxy username
proxy_password (str): Webshare proxy password
filter_ip_locations (List[str], optional): Country codes to filter IPs
retries_when_blocked (int, optional): Retry attempts when blocked. Defaults to 10
domain_name (str, optional): Webshare domain. Defaults to "p.webshare.io"
proxy_port (int, optional): Proxy port. Defaults to 80
Note:
Requires Webshare "Residential" proxy package, not "Proxy Server" or "Static Residential".
Free tier uses "Proxy Server" and will not work reliably.
"""
@property
def url(self):
"""str: Complete proxy URL with rotation and location filtering"""
@property
def http_url(self):
"""str: HTTP proxy URL"""
@property
def https_url(self):
"""str: HTTPS proxy URL"""
@property
def prevent_keeping_connections_alive(self):
"""bool: Always True for rotating proxies to ensure IP rotation"""
@property
def retries_when_blocked(self):
"""int: Number of retry attempts when blocked"""
@property
def proxy_username(self):
"""str: Webshare proxy username"""
@property
def proxy_password(self):
"""str: Webshare proxy password"""
@property
def domain_name(self):
"""str: Webshare domain name"""
@property
def proxy_port(self):
"""int: Proxy port number"""Exception classes for proxy configuration errors.
class InvalidProxyConfig(Exception):
"""
Raised when proxy configuration is invalid.
Args:
message (str): Error description
"""from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import GenericProxyConfig
# HTTP proxy only
proxy_config = GenericProxyConfig(http_url='http://proxy.example.com:8080')
# HTTPS proxy only
proxy_config = GenericProxyConfig(https_url='https://proxy.example.com:8080')
# Different proxies for HTTP and HTTPS
proxy_config = GenericProxyConfig(
http_url='http://http-proxy.example.com:8080',
https_url='https://https-proxy.example.com:8080'
)
# Use with API
api = YouTubeTranscriptApi(proxy_config=proxy_config)
transcript = api.fetch('dQw4w9WgXcQ')from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import GenericProxyConfig
# SOCKS5 proxy
proxy_config = GenericProxyConfig(
http_url='socks5://proxy.example.com:1080',
https_url='socks5://proxy.example.com:1080'
)
# SOCKS5 with authentication
proxy_config = GenericProxyConfig(
http_url='socks5://username:password@proxy.example.com:1080',
https_url='socks5://username:password@proxy.example.com:1080'
)
api = YouTubeTranscriptApi(proxy_config=proxy_config)
transcript = api.fetch('dQw4w9WgXcQ')from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import WebshareProxyConfig
# Basic Webshare configuration
proxy_config = WebshareProxyConfig(
proxy_username='your_username',
proxy_password='your_password'
)
# With country filtering and custom retry count
proxy_config = WebshareProxyConfig(
proxy_username='your_username',
proxy_password='your_password',
filter_ip_locations=['US', 'CA', 'GB'],
retries_when_blocked=15
)
# With custom domain and port
proxy_config = WebshareProxyConfig(
proxy_username='your_username',
proxy_password='your_password',
domain_name='custom.webshare.io',
proxy_port=8080
)
api = YouTubeTranscriptApi(proxy_config=proxy_config)
transcript = api.fetch('dQw4w9WgXcQ')from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import GenericProxyConfig, InvalidProxyConfig
from youtube_transcript_api import RequestBlocked, IpBlocked
try:
# Invalid proxy configuration
proxy_config = GenericProxyConfig() # No URLs provided
except InvalidProxyConfig as e:
print(f"Invalid proxy config: {e}")
try:
proxy_config = GenericProxyConfig(http_url='http://proxy.example.com:8080')
api = YouTubeTranscriptApi(proxy_config=proxy_config)
transcript = api.fetch('dQw4w9WgXcQ')
except RequestBlocked as e:
print(f"Request blocked despite proxy: {e}")
# The error message will include proxy-specific guidance
except IpBlocked as e:
print(f"IP blocked: {e}")from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import GenericProxyConfig
import requests
# Create custom session with proxy
proxy_config = GenericProxyConfig(http_url='http://proxy.example.com:8080')
session = requests.Session()
# Apply proxy configuration manually if needed
session.proxies.update(proxy_config.to_requests_dict())
session.timeout = 30
# Pass both proxy config and custom session
api = YouTubeTranscriptApi(proxy_config=proxy_config, http_client=session)
transcript = api.fetch('dQw4w9WgXcQ')from youtube_transcript_api.proxies import GenericProxyConfig, WebshareProxyConfig
# Test proxy configuration format
generic_config = GenericProxyConfig(http_url='http://proxy:8080')
proxy_dict = generic_config.to_requests_dict()
print(f"Generic proxy config: {proxy_dict}")
webshare_config = WebshareProxyConfig('user', 'pass', filter_ip_locations=['US'])
webshare_dict = webshare_config.to_requests_dict()
print(f"Webshare proxy config: {webshare_dict}")
print(f"Webshare URL: {webshare_config.url}")
print(f"Retries when blocked: {webshare_config.retries_when_blocked}")
print(f"Close connections: {webshare_config.prevent_keeping_connections_alive}")from typing import TypedDict, Optional, List
class RequestsProxyConfigDict(TypedDict):
"""
Proxy configuration dictionary for requests library.
Attributes:
http (str): HTTP proxy URL
https (str): HTTPS proxy URL
"""
http: str
https: str
# Union type for all proxy configurations
ProxyConfig = Union[GenericProxyConfig, WebshareProxyConfig]Install with Tessl CLI
npx tessl i tessl/pypi-youtube-transcript-api