Python API for retrieving YouTube video transcripts and subtitles without browser automation
The main API class providing all functionality for retrieving YouTube transcripts. Supports proxy configuration, custom HTTP clients, language selection, and formatting options.
Main entry point for all transcript retrieval operations. Provides both simple one-step transcript fetching and detailed transcript list management.
class YouTubeTranscriptApi:
def __init__(self, proxy_config=None, http_client=None):
"""
Initialize the YouTube Transcript API client.
Args:
proxy_config (ProxyConfig, optional): Proxy configuration for network requests
http_client (requests.Session, optional): Custom HTTP client session
Note:
Not thread-safe due to requests.Session usage. Create separate instances per thread.
"""
def fetch(self, video_id, languages=("en",), preserve_formatting=False):
"""
Retrieve transcript for a single video (shortcut method).
Args:
video_id (str): YouTube video ID (not full URL)
languages (tuple, optional): Language codes in priority order. Defaults to ("en",)
preserve_formatting (bool, optional): Whether to keep HTML formatting. Defaults to False
Returns:
FetchedTranscript: The retrieved transcript with content
Raises:
VideoUnavailable: Video is no longer available
TranscriptsDisabled: Subtitles are disabled for this video
NoTranscriptFound: No transcript found for requested languages
RequestBlocked: Requests blocked by YouTube (IP ban)
InvalidVideoId: Invalid video ID provided
"""
def list(self, video_id):
"""
Get list of all available transcripts for a video.
Args:
video_id (str): YouTube video ID (not full URL)
Returns:
TranscriptList: Container with all available transcripts
Raises:
VideoUnavailable: Video is no longer available
TranscriptsDisabled: Subtitles are disabled for this video
RequestBlocked: Requests blocked by YouTube (IP ban)
InvalidVideoId: Invalid video ID provided
"""from youtube_transcript_api import YouTubeTranscriptApi
# Simple transcript fetch with default English
api = YouTubeTranscriptApi()
transcript = api.fetch('dQw4w9WgXcQ')
for snippet in transcript:
print(f"[{snippet.start:.2f}s] {snippet.text}")from youtube_transcript_api import YouTubeTranscriptApi
api = YouTubeTranscriptApi()
# Try Spanish first, then English as fallback
transcript = api.fetch('dQw4w9WgXcQ', languages=['es', 'en'])
# Or use the list method for more control
transcript_list = api.list('dQw4w9WgXcQ')
transcript = transcript_list.find_transcript(['es', 'en'])
fetched = transcript.fetch()from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import GenericProxyConfig
# Configure generic proxy
proxy_config = GenericProxyConfig(
http_url='http://proxy:8080',
https_url='https://proxy:8080'
)
api = YouTubeTranscriptApi(proxy_config=proxy_config)
transcript = api.fetch('dQw4w9WgXcQ')from youtube_transcript_api import YouTubeTranscriptApi
import requests
# Use custom session with timeout
session = requests.Session()
session.timeout = 30
api = YouTubeTranscriptApi(http_client=session)
transcript = api.fetch('dQw4w9WgXcQ')from youtube_transcript_api import YouTubeTranscriptApi
api = YouTubeTranscriptApi()
transcript = api.fetch('dQw4w9WgXcQ', preserve_formatting=True)
# Text may contain HTML tags like <b>, <i>, etc.
for snippet in transcript:
print(snippet.text) # May include HTML formattingfrom typing import Optional, Iterable
from requests import Session
# Type hints for main API
ProxyConfig = Union[GenericProxyConfig, WebshareProxyConfig]
LanguageCodes = Iterable[str]Install with Tessl CLI
npx tessl i tessl/pypi-youtube-transcript-api