Python API for retrieving YouTube video transcripts and subtitles without browser automation
npx @tessl/cli install tessl/pypi-youtube-transcript-api@1.2.0A Python API for retrieving YouTube video transcripts and subtitles without requiring browser automation. Supports manually created and automatically generated subtitles, transcript translation, multiple output formats, and proxy configuration for working around IP restrictions.
pip install youtube-transcript-apifrom youtube_transcript_api import YouTubeTranscriptApiFor specific functionality:
from youtube_transcript_api import (
YouTubeTranscriptApi,
TranscriptList,
Transcript,
FetchedTranscript,
FetchedTranscriptSnippet,
YouTubeTranscriptApiException
)For formatters:
from youtube_transcript_api.formatters import (
JSONFormatter,
TextFormatter,
SRTFormatter,
WebVTTFormatter,
PrettyPrintFormatter,
FormatterLoader
)For proxy configuration:
from youtube_transcript_api.proxies import (
GenericProxyConfig,
WebshareProxyConfig
)from youtube_transcript_api import YouTubeTranscriptApi
# Simple transcript fetch
api = YouTubeTranscriptApi()
transcript = api.fetch('video_id')
# Process transcript data
for snippet in transcript:
print(f"{snippet.start}: {snippet.text}")
# Get list of available transcripts
transcript_list = api.list('video_id')
for t in transcript_list:
print(f"{t.language_code}: {t.language} ({'generated' if t.is_generated else 'manual'})")
# Fetch specific language with fallback
transcript = transcript_list.find_transcript(['es', 'en'])
fetched = transcript.fetch()
# Translate transcript
translated = transcript.translate('fr')
french_transcript = translated.fetch()The library uses a hierarchical structure for transcript management:
This design enables efficient discovery of available transcripts, flexible language selection with fallbacks, and lazy loading of transcript content only when needed.
Main API class for retrieving transcripts with support for language selection, proxy configuration, and custom HTTP clients.
class YouTubeTranscriptApi:
def __init__(self, proxy_config=None, http_client=None): ...
def fetch(self, video_id, languages=("en",), preserve_formatting=False): ...
def list(self, video_id): ...Data classes for representing transcript lists, individual transcripts, and fetched content with timing information.
class TranscriptList:
def find_transcript(self, language_codes): ...
def find_generated_transcript(self, language_codes): ...
def find_manually_created_transcript(self, language_codes): ...
class Transcript:
def fetch(self, preserve_formatting=False): ...
def translate(self, language_code): ...
class FetchedTranscript:
def to_raw_data(self): ...Classes for converting transcript data into various output formats including JSON, plain text, SRT subtitles, and WebVTT.
class JSONFormatter:
def format_transcript(self, transcript, **kwargs): ...
def format_transcripts(self, transcripts, **kwargs): ...
class SRTFormatter:
def format_transcript(self, transcript, **kwargs): ...
class WebVTTFormatter:
def format_transcript(self, transcript, **kwargs): ...Classes for configuring HTTP proxies to work around IP blocking, including generic proxy support and specialized Webshare residential proxy integration.
class GenericProxyConfig:
def __init__(self, http_url=None, https_url=None): ...
class WebshareProxyConfig:
def __init__(self, proxy_username, proxy_password, **kwargs): ...Comprehensive exception hierarchy for handling all error scenarios including video unavailability, IP blocking, missing transcripts, and translation errors.
class YouTubeTranscriptApiException(Exception): ...
class CouldNotRetrieveTranscript(YouTubeTranscriptApiException): ...
class VideoUnavailable(CouldNotRetrieveTranscript): ...
class TranscriptsDisabled(CouldNotRetrieveTranscript): ...
class NoTranscriptFound(CouldNotRetrieveTranscript): ...